Notebook to reproduce the results of:

Bernard E, Nannya Y, Hasserjian P.H, et al. 

Implications of TP53 Allelic State for Genome Stability, Clinical Presentation and Outcomes in Myelodysplastic Syndromes

Load PackagesΒΆ

Load below the used R packages.

In [189]:
library(ggplot2)
library(reshape2)
library(gridExtra)
library(survival)
library(survminer)
library(cmprsk)
library(stringr)

Read DataframesΒΆ

In this section we read the clinical and molecular dataframes from the data folder.

In [190]:
# Clinical data with cytogenetics and NGS-derived copy-number results
# With detailed analysis of the status of chr17 at the TP53 locus
dd = read.table("./data/dd_clinical_cyto_chr17.tsv", header=T, sep="\t", stringsAsFactors=F)
In [191]:
paste("Total number of patients is", nrow(dd))
'Total number of patients is 3324'
In [192]:
# Binary matrix of chromosomal alterations
# Patients in row and alterations in columns
ddcyto = read.table("./data/dd_matrix_binary_cyto.tsv", header=T, sep="\t", stringsAsFactors=F)
if(identical(dd$LEUKID,ddcyto$LEUKID)) print("all good with IDs")
[1] "all good with IDs"
In [193]:
ddcyto[c(1,9,151),1:5]
A data.frame: 3 Γ— 5
LEUKIDdel5qplus8del7del20q
<chr><int><int><int><int>
1I-H-132697-T1-1-D1-10001
9I-H-132704-T1-1-D1-11011
151E-H-117425-T1-1-D1-11010
In [194]:
# TP53 mutation file
# One line per mutation per patient
mm = read.table("./data/maf_TP53.tsv", header=T, sep="\t", stringsAsFactors=F)
In [195]:
head(mm,n=3)
A data.frame: 3 Γ— 18
TARGET_NAMECHRSTARTENDREFALTvariant.keyGENEcDNA_CHANGEPROTEIN_CHANGEaa_numberVAG_VTEFFECTVAFDEPTHCALLED_BYdetail_statusallelic_status
<chr><int><int><int><chr><chr><chr><chr><chr><chr><int><chr><chr><dbl><int><chr><chr><chr>
1E-H-100002-T1-1-D1-11775795337579533GA17_7579533_G_ATP53c.154C>T p.Q52* 52Substop_gained 0.597 487C,S,Mmut+cnlohmulti
2E-H-100010-T1-1-D1-11775771207577120CT17_7577120_C_TTP53c.818G>A p.R273H273Subnon_synonymous_codon0.869 510C,S,Mmut+del multi
3E-H-100015-T1-1-D1-11775785567578556TA17_7578556_T_ATP53c.376-2A>Tp.? NASubsplice_site_variant 0.8401028C mut+cnlohmulti
In [196]:
paste("Total number of TP53 mutations is", nrow(mm), "from", length(unique(mm$TARGET_NAME)),"TP53-mutated patients")
'Total number of TP53 mutations is 486 from 378 TP53-mutated patients'

Get ReadyΒΆ

In [197]:
# Some ggstyles
themePP = theme_classic() + theme(text=element_text(size=25,colour="black"),
                                 axis.text=element_text(size=25,colour="black"),
                                 axis.title=element_text(size=25,colour="black"),
                                 strip.text.x = element_text(size=25),
                                 legend.text=element_text(size=25)
                                )
noleg = theme(legend.position = "none")
topleg = theme(legend.position = "top")
angle45 = theme(axis.text.x = element_text(angle = 45, hjust = 1))
noxtitle = theme(axis.title.x = element_blank())
noytitle = theme(axis.title.y = element_blank())
noxlabel = theme(axis.text.x = element_blank())
noylabel = theme(axis.text.y = element_blank())
nolegtitle = theme(legend.title = element_blank())
In [198]:
# Some colors
col.subtypes = c("#EE9937","#67a9cf","#3690c0","#02818a")
col.status = c(col.subtypes[1],"#1d91c0")
col.ipssr = c("#238b45","#66c2a4","#807dba","#fdbb84","#ef6548")
col.who = c("#fcbba1","#d8b365","#99d8c9","#66c2a4","#41ae76","#43a2ca","#9e9ac8","#d4b9da","#c994c7","#df65b0","#fdd0a2","#fdae6b","#bdbdbd","#969696")
mypal.chr = c("#b24745ff","#dF8f44ff","#6A6599FF","#00a1d5ff","#374E55FF")
pal.wt.mut = c("darkgrey","#756bb1")
In [199]:
# Pval code function
pval_to_signif_code <- function (p_vals) 
{
    return(ifelse(p_vals < 0.0001, "****", ifelse(p_vals < 0.001, 
        "***", ifelse(p_vals < 0.01, "**", ifelse(p_vals < 0.05, "*", "ns")))))
}
pval_to_signif_code2 <- function (p_vals) 
{
    return(ifelse(p_vals < 0.0001, "<0.0001 ****", ifelse(p_vals < 0.001, 
        "<0.001 ***", ifelse(p_vals < 0.01, "<0.01 **", ifelse(p_vals < 0.05, "<0.05 *", "ns")))))
}

Below we compute outcome vectors and function for competing risk analysis

In [200]:
# Competing riks
dd$comp_sample_years = pmin(dd$os_sample_years, dd$aml_sample_years)
dd$comp_status = "censor"
dd$comp_status[dd$os_status==1] = "death"
dd$comp_status[dd$aml_status==1] = "aml"
dd$comp_status = factor(dd$comp_status, levels=c("censor","aml","death"))
table(dd$comp_status)

gg_competingrisks.cuminc <- function(fit, gnames = NULL, gsep=" ",
                                    coef = 1.96, conf.int = FALSE, line.size=1, group.levels=NULL) {
    
  # -- fit is from cuminc --  #
  if (!is.null(fit$Tests))
    fit <- fit[names(fit) != "Tests"]
  fit2 <- lapply(fit, `[`, 1:3)
  if (is.null(gnames)) gnames <- names(fit2)
  fit2_list <- lapply(seq_along(gnames), function(ind) {
    df <- as.data.frame(fit2[[ind]])
    df$name <- gnames[ind]
    df
  })
  time <- est <- event <- group <- NULL
  df <- do.call(rbind, fit2_list)
  df$event <- sapply(strsplit(df$name, split=gsep), `[`, 2)
  df$group <- sapply(strsplit(df$name, split=gsep), `[`, 1)
  df$std <- std <- sqrt(df$var)
  if (!is.na(group.levels)) {
      df$group = factor(df$group, levels=group.levels)
  }

  df <- df[df$event=="aml",]

  pl <-  ggplot(df, aes(time, est, color=group)) + geom_line(size=line.size) + ylab("Cumulative incidence of AMLt") + xlab("Years") + theme_survminer()

  if (conf.int) {
    pl <- pl + geom_ribbon(aes(ymin = est - coef*std, ymax=est + coef*std, fill = event), alpha = 0.2, linetype=0)
  }

  return(pl)
}
censor    aml  death 
  1661    562   1101 

Compute TP53 subgroups and allelic statesΒΆ

We integrate mutations and allelic imbalances at the TP53 locus to define TP53 subgroups.

The subgroups are

    1. mono-allelic TP53 per single gene mutation (1mut)
    1. multiple TP53 mutations without allelic imbalances at the TP53 locus (>1mut)
    1. mutation(s) and concomitant deletion at the TP53 locus (mut+del)
    1. mutation(s) and concomitant cnLOH at the TP53 locus (mut+cnloh)

cnLOH: copy-neutral loss of heterozygosity

We refer to subgroup 1. as mono-allelic TP53 and to subgroups 2-4 as bi-allelic or multi-hit.

In [264]:
# Status of chr17 at the TP53 locus
table(dd$chr17,exclude=F)
 cnloh    del   gain iso17q     WT 
    80     97      1     10   3136 
In [265]:
# Add TP53 mutations
# Number of mutations:
dd$TP53mut = sapply(dd$LEUKID, function(ll) sum(mm$TARGET_NAME==ll))
In [266]:
# Simply WT or MUT:
dd$tp53 = "WT"
dd$tp53[dd$TP53mut>0] = "MUT"
# TP53 WT or 1mut or >1mut
dd$TP53 = "WT"
dd$TP53[dd$TP53mut==1] = "1mut"
dd$TP53[dd$TP53mut>1] = ">1mut"
dd$TP53 = factor(dd$TP53, levels=c("WT","1mut",">1mut"))
# Label
dd$TP53label = "WT"
dd$TP53label[dd$TP53mut!=0] = paste0(dd$TP53mut[dd$TP53mut!=0],"mut")
In [267]:
paste("TP53 mutation status:")
table(dd$TP53mut)
paste("TP53 mutation status & chr17:")
table(dd$TP53mut, dd$chr17)
'TP53 mutation status:'
   0    1    2    3 
2946  274  100    4 
'TP53 mutation status & chr17:'
   
    cnloh  del gain iso17q   WT
  0     2   12    0     10 2922
  1    73   76    1      0  124
  2     4    7    0      0   89
  3     1    2    0      0    1
In [273]:
# Compute TP53 subgroups and allelic states
# subgroups:
dd$detail_status = as.vector(dd$TP53)
dd$detail_status[dd$TP53mut==0 & dd$chr17!="WT"] = NA
dd$detail_status[dd$TP53mut>0 & dd$chr17=="cnloh"] = "mut+cnloh"
dd$detail_status[dd$TP53mut>0 & dd$chr17=="del"] = "mut+del"
dd$detail_status = factor(dd$detail_status,levels=c("WT","1mut",">1mut","mut+del","mut+cnloh"))
# allelic states:
dd$allelic_status = as.vector(dd$detail_status)
dd$allelic_status[dd$detail_status%in%c(">1mut","mut+del","mut+cnloh")] = "multi"
dd$allelic_status = factor(dd$allelic_status, levels=c("WT","1mut","multi"))
In [274]:
paste("Main TP53 subgroups:")
table(dd$detail_status,exclude=F)
'Main TP53 subgroups:'
       WT      1mut     >1mut   mut+del mut+cnloh      <NA> 
     2922       125        90        85        78        24 

Note that the 24 NA corresponds to the 12, 2 and 10 cases with deletion, cnloh and iso17q respectively but without any gene mutation.

In [276]:
paste("With allelic states:")
table(dd$detail_status,dd$allelic_status,exclude=F)
'With allelic states:'
           
              WT 1mut multi <NA>
  WT        2922    0     0    0
  1mut         0  125     0    0
  >1mut        0    0    90    0
  mut+del      0    0    85    0
  mut+cnloh    0    0    78    0
  <NA>         0    0     0   24
In [277]:
aa = dd[which(dd$detail_status!="WT"),]
aa$detail_status = factor(aa$detail_status, levels=c("1mut",">1mut","mut+del","mut+cnloh"))
aa$allelic_status = factor(aa$allelic_status, levels=c("1mut","multi"))
In [278]:
im = match(mm$TARGET_NAME,dd$LEUKID)
mm$detail_status = as.vector(dd[im,"detail_status"])
mm$detail_status = factor(mm$detail_status, levels=c("1mut",">1mut","mut+del","mut+cnloh"))
mm$allelic_status = as.vector(dd[im,"allelic_status"])
mm$allelic_status = factor(mm$allelic_status, levels=c("1mut","multi"))

Metrics CalculationΒΆ

We calculate the total number of deletions, gains or rearrangements on different chromosomes than 17.

In [279]:
dd$num_deletion_no17 = apply(ddcyto[,colnames(ddcyto)[grepl("del",colnames(ddcyto)) & !grepl("17",colnames(ddcyto))]],1,sum)
dd$num_gain_no17 = apply(ddcyto[,colnames(ddcyto)[grepl("plus",colnames(ddcyto)) & !grepl("17",colnames(ddcyto))]],1,sum)
dd$num_rearr_no17 = apply(ddcyto[,colnames(ddcyto)[grepl("r_",colnames(ddcyto)) & !grepl("17",colnames(ddcyto))]],1,sum)

We also calculate the number of unique chromosomes other than 17 with aberrations.

In [280]:
ddcyto.reduce.no17 = ddcyto[,!grepl("17",colnames(ddcyto))]
# list of vectors of events:
lresno17 = apply(ddcyto.reduce.no17, 1,  function(x) {res=NA; ix=which(x==1); if(length(ix)>0){ res=colnames(ddcyto.reduce.no17)[x==1]}; return(res)})
# Number of unique chr different than 17 with aberrations:
chr_from_events <- function(vector.events) {
    # extract the chromosomes involved in vector.events
    # return NA if NK or events without chr specified (eg mar, WGA ...)
    vector.chr = NA
    if (any(!is.na(vector.events))) {
        vector.chr.list = lapply(vector.events, function(x) {
                                     y = strsplit(x,split="_")[[1]]
                                     z = str_extract(y,"[0-9]+")
                                     return(z) }
        )
        vector.chr = unique(unlist(vector.chr.list))
        if (any(!is.na(vector.chr))) {
            vector.chr = unique(sort(vector.chr))
        }
    }
    return(vector.chr)
}
ngs_vector_event <- function(ngs.string) {
    if (is.na(ngs.string)) {
        return(NA)
    } else {
        return(sort(unique(strsplit(ngs.string,split=",")[[1]])))
    }
}
dd$NUM_ALL_CHR_NO17 = sapply( lresno17, function(x) { y=unique(chr_from_events(x)); resl=0; if(!identical(NA,y)){resl=length(y)};return(resl) } )
In [281]:
for(mygroup in levels(dd$detail_status)) {
    print(paste0("Summary distribution of number of chr. with aberrations for ",mygroup,":"))
    print(summary(dd$NUM_ALL_CHR_NO17[dd$detail_status==mygroup]))
    cat("\n")
}
[1] "Summary distribution of number of chr. with aberrations for WT:"
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
 0.0000  0.0000  0.0000  0.6057  1.0000  7.0000      24 

[1] "Summary distribution of number of chr. with aberrations for 1mut:"
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
  0.000   0.000   1.000   1.256   2.000  10.000      24 

[1] "Summary distribution of number of chr. with aberrations for >1mut:"
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
  0.000   3.250   5.000   5.533   7.000  16.000      24 

[1] "Summary distribution of number of chr. with aberrations for mut+del:"
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
  1.000   4.000   6.000   6.035   8.000  14.000      24 

[1] "Summary distribution of number of chr. with aberrations for mut+cnloh:"
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
  1.000   5.000   6.000   6.321   8.000  13.000      24 

Main Figure 1ΒΆ

Integration of mutations and allelic imbalances at TP53 locusΒΆ

In [282]:
options(repr.plot.width=12, repr.plot.height=8)
hhh = dd[-which(dd$chr17=="WT" & dd$tp53=="WT"),]
hhh$chr17[hhh$chr17=="WT"] = "normal"
hhh$chr17[hhh$iso17q==1] = "iso17q"
hhh$chr17 = factor(hhh$chr17, levels=c("cnloh","del","iso17q","gain","normal"))
hhhlabel = as.data.frame(table(hhh$TP53mut))
hhhlabel$label = paste0("N=",hhhlabel$Freq)
gchrgene = ggplot(hhh) + geom_bar(aes(x=factor(TP53mut),fill=chr17)) + themePP +
ylab(paste0("Number of patients","\n","(from patients with any hit on TP53 locus)")) + xlab("\nNumber of TP53 mutations") +
scale_fill_manual(values=mypal.chr,name="Chr17 at the TP53 locus") +
guides(fill=guide_legend(ncol=3)) + 
theme(legend.position = c(0.50, 0.91),legend.justification = "left") +
scale_y_continuous(breaks=c(0,100,200,300),limits=c(0,300)) + 
geom_text(data=hhhlabel,aes(x=Var1,y=Freq,label=label),vjust=-0.5,angle=0,hjust=0.5,size=9,color="black")

gchrgene

Prevalence of TP53 subgroupsΒΆ

In [283]:
bb = dd[dd$detail_status != "WT",]
bb$detail_status = factor(bb$detail_status,levels=names(table(bb$detail_status))[-1])
bfreq = as.data.frame(table(bb$detail_status))
colnames(bfreq) = c("type","count")
bfreq$freq = 100*bfreq$count/sum(bfreq$count)
bfreq$label1 = paste0("N=",bfreq$count)
In [284]:
options(repr.plot.width=8, repr.plot.height=6)
bfreq$type = factor(bfreq$type, levels=rev(levels(bfreq$type)))

gg.preval.2 = ggplot(bfreq,aes(x=type,y=freq, fill=type)) + geom_bar(stat="identity") + 
geom_text(aes(label=label1),colour="black",vjust=0,angle=0,hjust=-0.1,size=9) + 
themePP + 
xlab("TP53 subgroup") + ylab("\n % of TP53 mutated patients") + ylim(0,40) + scale_fill_manual(values=rev(col.subtypes)) + 
scale_colour_manual(values=rev(col.subtypes)) + noleg + 
coord_flip()

gg.preval.2

VAF per TP53 subgroupΒΆ

VAF: variant allele frequency

In [285]:
options(repr.plot.width=8, repr.plot.height=7)
gg.vaf = ggplot(mm,aes(x=100*VAF,fill=detail_status,color=detail_status))+geom_density() + 
themePP +
scale_fill_manual(values=col.subtypes) + scale_color_manual(values=col.subtypes) + 
facet_grid(detail_status~.,scales="free_y") + noleg + scale_y_continuous(position="left") + 
ylab("Density estimation") + xlab("\n VAF of TP53 mutations") + 
theme(axis.text.y=element_blank(), axis.ticks.y=element_blank())

gg.vaf
In [286]:
for(mygroup in levels(dd$detail_status)[-1]) {
    print(paste0("Summary distribution of TP53 mutations VAF for ",mygroup,":"))
    print(summary(mm$VAF[mm$detail_status==mygroup]))
    cat("\n")
}
[1] "Summary distribution of TP53 mutations VAF for 1mut:"
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.0210  0.0510  0.1290  0.1776  0.2670  0.5290 

[1] "Summary distribution of TP53 mutations VAF for >1mut:"
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.0200  0.1610  0.3220  0.2862  0.3980  0.5160 

[1] "Summary distribution of TP53 mutations VAF for mut+del:"
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.0200  0.2407  0.4100  0.4567  0.6983  0.9180 

[1] "Summary distribution of TP53 mutations VAF for mut+cnloh:"
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.0220  0.4284  0.7105  0.6303  0.8505  0.9760 

Main Figure 2ΒΆ

Genome instability per TP53 subgroupΒΆ

In [287]:
options(repr.plot.width=8, repr.plot.height=6)
gg.num.all  =  ggplot(dd[which(dd$detail_status!="WT"),], aes(x=detail_status,y=NUM_ALL_CHR_NO17)) +
geom_pointrange(data=dd[which(dd$detail_status!="WT"),],mapping=aes(x=detail_status,y=NUM_ALL_CHR_NO17,color=detail_status),
                stat = "summary", position_dodge(width=0.4),
                size = 3,
                fun.min = function(z) {quantile(z,0.25)},
                fun.max = function(z) {quantile(z,0.75)},
                fun = median) +
themePP + 
xlab("\nTP53 subgroup") + ylab("No. of unique chr. other than 17 \n with aberration per patient") +
scale_color_manual(values=col.subtypes,guide="none") + scale_y_continuous(breaks=c(0,2,4,6,8)) + 
stat_compare_means(label="p.signif",ref.group="1mut",label.y.npc=0.51,method="wilcox.test",size=6) + 
stat_summary(geom="text", fun=median,
               aes(label=sprintf("%1.0f", ..y..), color=detail_status),
               position=position_nudge(x=-0.18), size=9)

gg.num.all

Deletions, gains and rearrangements per TP53 subgroupΒΆ

In [288]:
bb = melt(data=dd[dd$detail_status%in%c("1mut",">1mut","mut+del","mut+cnloh"),], id.vars=c("LEUKID","detail_status","allelic_status"), measure.vars = c("num_deletion_no17", "num_gain_no17","num_rearr_no17"))
bb$variable = gsub("num_deletion_no17","del",bb$variable)
bb$variable = gsub("num_gain_no17","gain",bb$variable)
bb$variable = gsub("num_rearr_no17","rearr",bb$variable)
colnames(bb) = c("LEUKID","detail_status","allelic_status","anomaly","number")
bb$anomaly = factor(bb$anomaly, levels=c("rearr","gain","del"))
In [289]:
options(repr.plot.width=15, repr.plot.height=5)

cm = compare_means(number ~ detail_status, data=bb, method = "wilcox.test", paired = FALSE,
                  group.by = "anomaly",ref.group="1mut")
cm$detail_status = cm$group2
cm$detail_status = factor(cm$detail_status, levels=levels(bb$detail_status))

gg.num.aberr = ggplot(bb,aes(x=anomaly,y=number)) + geom_boxplot(outlier.shape=NA,aes(fill=detail_status)) + 
facet_grid(.~detail_status) +
themePP + 
noleg + scale_fill_manual(values=col.subtypes) + scale_color_manual(values=col.subtypes) + ylab("Number per patient") + 
scale_y_continuous(breaks=c(0,5,10),limits=c(0,10.2)) +
xlab("\nAberrations on different chromosomes than 17") +
geom_text(data=cm,aes(x=anomaly,y=10.2,label=p.signif),size=6)

gg.num.aberr
Warning message:
β€œRemoved 1 rows containing non-finite values (stat_boxplot).”

Heatmap of chromosomal alterations per TP53 subrgoup (Extended Data)ΒΆ

We display below a heatmap of chromosomal alterations across TP53 subgroups.

Patients are in columns, ordered by their TP53 subgroup: 1. mono-allelic 2. multi-hit per multiple mutations 3. multi-hit per mutation(s) + deletion 4. multi-hit per mutation(s) + cnloh.

Alterations are in rows. We display any alterations found in >=2 % of patients from the mono-allelic state or the multi-hit state (i.e >1mut, mut+del, mut+cnloh).

Alterations are ordered as:

  • presence of complex karyottype
  • marker chromosome mar
  • deletions
  • gains
  • rearrangements
  • cnloh
  • whole genome amplification WGA
  • presence of a ring chromosomes ring
In [311]:
tbi = sort(apply(ddcyto[which(dd$allelic_status=="multi"),colnames(ddcyto)[-1]],2,sum))
tmono = sort(apply(ddcyto[which(dd$allelic_status=="1mut"),colnames(ddcyto)[-1]],2,sum))
In [314]:
# We consider for the heatmap,
# the chromosomal alterations observed in >=2 % of patients from the TP53 mono-allelic or mutli-hit cases
namesbi = rev(names(tbi)[100*tbi/length(which(dd$allelic_status=="multi"))>2])
namesmono = rev(names(tmono)[100*tmono/length(which(dd$allelic_status=="1mut"))>2])
In [317]:
mydelh = c(namesbi[grep("del",namesbi)])
mygainh = c(namesbi[grep("plus",namesbi)], "plus12")
myrh = c(namesbi[grep("r_",namesbi)])
mylohh = c(namesbi[grep("cnloh",namesbi)],"cnloh9p")
myaberr = c("complex","mar",mydelh,mygainh,myrh,mylohh,"WGA","ring")
In [318]:
tmp = dd[which(dd$allelic_status%in%c("1mut","multi")),]
tmp$myck = tmp$complex
tmp$complex = 0
tmp$complex[tmp$myck=="complex"] = 1
tmp$detail_status = factor(tmp$detail_status, levels=c("1mut",">1mut","mut+del","mut+cnloh"))
tmp$mygroup = tmp$detail_status # not smart
tmp = cbind( tmp[,"complex",drop=F] , ddcyto[which(dd$allelic_status%in%c("1mut","multi")),myaberr[-1]] , tmp[,c("detail_status","mygroup","LEUKID")] )
In [320]:
# Merge del5q and del5
# Merge del17p and del17
# Merge cnloh17p and cnloh17
# For better visualization
tmp$del5q[tmp$del5==1] = 1
tmp$del5 = NULL
colnames(tmp)[colnames(tmp)=="del5q"] = "del5q/5"
myaberr = myaberr[-which(myaberr=="del5")]
myaberr[myaberr=="del5q"] = "del5q/5"

tmp$del17p[which(tmp$del17==1)] = 1
tmp$del17 = NULL
colnames(tmp)[colnames(tmp)=="del17p"] = "del17p/17"
myaberr = myaberr[-which(myaberr=="del17")]
myaberr[myaberr=="del17p"] = "del17p/17"

tmp$cnloh17p[which(tmp$cnloh17==1)] = 1
tmp$cnloh17 = NULL
colnames(tmp)[colnames(tmp)=="cnloh17p"] = "cnloh17p/17"
myaberr = myaberr[-which(myaberr=="cnloh17")]
myaberr[myaberr=="cnloh17p"] = "cnloh17p/17"
In [321]:
# Cluster patients per TP53 subgroups based on molecular vectors
listpatientlevel2  = lapply(levels(tmp$mygroup), function(x) {
    ww = tmp$LEUKID[which(tmp$mygroup==x)]
    fmp = tmp[which(tmp$mygroup==x),]
    # cluster patients within group
    hdist = dist(fmp[,myaberr],method="binary")
    hindex = rev(hclust(hdist, method = "ward.D", members = NULL)$order)
    return( ww[hindex] ) 
})
long_dd_clustered = melt(tmp, id=c("LEUKID","mygroup"),measure.vars = myaberr)
colnames(long_dd_clustered)[1] = "Patient"
long_dd_clustered$Patient = factor(long_dd_clustered$Patient, levels=unlist(listpatientlevel2))
long_dd_clustered$variable = factor(long_dd_clustered$variable, levels=rev(myaberr))
row_sep = c(2,length(mydelh)-2,length(mygainh),length(myrh),length(mylohh)-1,2)
row_sep_cum = c(cumsum(row_sep))
row_sep_cum = row_sep_cum[-length(row_sep_cum)]
row_sep_cum = length(myaberr) - row_sep_cum
row_col = as.vector(sapply(levels(tmp$mygroup), function(x) sum(tmp$mygroup==x)))
row_col_cum = c(cumsum(row_col))
row_col_cum = row_col_cum[-length(row_col_cum)] 
uptext = data.frame(x=c(cumsum(row_col)-(row_col/2)), y=rep(length(myaberr)+3,), label=levels(tmp$mygroup))
In [322]:
# Plot the heatmap
options(repr.plot.width=12, repr.plot.height=17)
ggheat2 = ggplot(long_dd_clustered, aes(Patient,variable,fill=)) +

    # geom raster heatmap
    geom_tile(aes(fill = factor(value)), show.legend = FALSE) +
    scale_fill_manual(values = c('0' = 'lightgrey', '1' = '#8c6bb1')) +

    # axis
    theme(axis.text.x = element_blank(), 
          axis.ticks.x = element_blank()) + 

    # column sand row separation  (+ 0.5 to be exactly at the good location)
    geom_vline(xintercept = row_col_cum[1] + 0.5, col = col.subtypes[1], linetype = 2, size = 1.1) + 
    geom_vline(xintercept = row_col_cum[2] + 1.0, col = col.subtypes[3], linetype = 2, size = 1.1) + 
    geom_vline(xintercept = row_col_cum[3] + 1.0, col = col.subtypes[4], linetype = 2, size = 1.1) + 
    geom_hline(yintercept = row_sep_cum + 0.5, col = 'white', linetype = 2, size = 1.1) +

    # 3d effect by adding white and grey horizontal line
    geom_hline(yintercept = seq(1, length(myaberr), 2) + 0.5, col = 'white', linetype = 1, size = 0.1) +
    geom_hline(yintercept = seq(0, length(myaberr), 2) + 0.5, col = 'darkgrey', linetype = 1, size = 0.1) +

    ylab("Aberration") + 

    geom_tile(aes(x=Patient,y=length(myaberr)+1.2,color=mygroup),alpha=.7,size=3) + 
    scale_color_manual(values=c(col.subtypes)) + 
    noleg + 
    geom_text(data=uptext, mapping=aes(x=x,y=78.1,label=label),size=5,color="white",fontface="bold")

#options(repr.plot.res = 300) # Better resolution for that plot
ggheat2

Main Figure 3ΒΆ

Blood Counts and Blasts per TP53 allelic stateΒΆ

Blood counts (Hb Plt ANC) and bone marrow blasts per TP53 allelic state

In [454]:
options(repr.plot.width=8, repr.plot.height=5)

ggHB = ggplot(dd[!is.na(dd$allelic_status),],aes(x=allelic_status,y=HB,fill=allelic_status)) + 
geom_boxplot(outlier.shape = NA,width=0.65) + 
themePP + xlab("TP53 state") + ylab("Hemoglobin (g/dL)") + noleg +
stat_summary(geom="text", fun=median,
               aes(label=sprintf("%1.0f", ..y..)),color="black",
               position=position_nudge(x=-0.47), size=8) + 
scale_fill_manual(values=c("#737373",col.status)) +
stat_compare_means(label="p.signif",comparisons=list(c(2,3)),tip.length=0,method="wilcox.test",size=6,label.y=c(13)) +
scale_y_continuous(breaks=c(8,12,16)) + 
coord_trans(y = "sqrt",ylim = c(4.5,16.5))

ggPLT = ggplot(dd[!is.na(dd$allelic_status),],aes(x=allelic_status,y=PLT,fill=allelic_status)) + 
geom_boxplot(outlier.shape=NA,width=0.65) + 
themePP + xlab("TP53 state") + ylab("Platelets (Giga/L)") + noleg +
stat_summary(geom="text", fun=median,
               aes(label=sprintf("%1.0f", ..y..)),color="black",
               position=position_nudge(x=-0.51), size=8) + 
scale_fill_manual(values=c("#737373",col.status)) +
stat_compare_means(label="p.signif",comparisons=list(c(2,3)),tip.length=0,method="wilcox.test",size=6,label.y=c(600)) + 
scale_y_continuous(breaks=c(200,400,600)) + 
coord_trans(y = "sqrt",ylim = c(0,700))
                  
ggANC = ggplot(dd[!is.na(dd$allelic_status),],aes(x=allelic_status,y=ANC,fill=allelic_status)) + geom_boxplot(outlier.shape=NA,width=0.65) + 
themePP + xlab("TP53 state") + ylab(" ANC (Giga/L)") + noleg +
stat_summary(geom="text", fun=median,
               aes(label=sprintf("%1.0f", ..y..)),color="black",
               position=position_nudge(x=-0.45), size=8) + 
scale_fill_manual(values=c("#737373",col.status)) +
stat_compare_means(label="p.signif",comparisons=list(c(2,3)),tip.length=0,method="wilcox.test",size=6,label.y=7) +
scale_y_continuous(breaks=c(2,4,6,8)) + 
coord_trans(y = "sqrt",ylim=c(0,8))

ggBM = ggplot(dd[!is.na(dd$allelic_status),],aes(x=allelic_status,y=BM_BLAST,fill=allelic_status)) + geom_boxplot(outlier.shape = NA,width=0.65) + 
themePP + xlab("TP53 state") + ylab(" Bone marrow blasts (%)") + noleg +
stat_summary(geom="text", fun=median,
               aes(label=sprintf("%1.0f", ..y..)),color="black",
               position=position_nudge(x=-0.45), size=8) + 
scale_fill_manual(values=c("#737373",col.status))  + 
stat_compare_means(label="p.signif",comparisons=list(c(2,3)),label.y=32,tip.length=0,method="wilcox.test",size=6) +
scale_y_continuous(breaks=c(10,20,30)) + 
coord_trans(y = "sqrt",ylim = c(0,35))
In [457]:
options(repr.plot.width=18, repr.plot.height=16)

ggarrange(ggHB+theme(plot.margin = unit(c(1.5,1.5,1.5,1.5), "lines")),
                            ggPLT+theme(plot.margin = unit(c(1.5,1.5,1.5,1.5), "lines")),
                            ggANC+theme(plot.margin = unit(c(1.5,1.5,1.5,1.5), "lines")),
                            ggBM+theme(plot.margin = unit(c(1.5,1.5,1.5,1.5), "lines")),
                            nrow=2,ncol=2)
Warning message:
β€œRemoved 111 rows containing non-finite values (stat_boxplot).”
Warning message:
β€œRemoved 111 rows containing non-finite values (stat_summary).”
Warning message:
β€œRemoved 111 rows containing non-finite values (stat_signif).”
Warning message:
β€œRemoved 125 rows containing non-finite values (stat_boxplot).”
Warning message:
β€œRemoved 125 rows containing non-finite values (stat_summary).”
Warning message:
β€œRemoved 125 rows containing non-finite values (stat_signif).”
Warning message:
β€œRemoved 199 rows containing non-finite values (stat_boxplot).”
Warning message:
β€œRemoved 199 rows containing non-finite values (stat_summary).”
Warning message:
β€œRemoved 199 rows containing non-finite values (stat_signif).”
Warning message:
β€œRemoved 108 rows containing non-finite values (stat_boxplot).”
Warning message:
β€œRemoved 108 rows containing non-finite values (stat_summary).”
Warning message:
β€œRemoved 108 rows containing non-finite values (stat_signif).”

OS per TP53 allelic stateΒΆ

OS: overall survival

In [323]:
options(repr.plot.width=13, repr.plot.height=8)
ff = as.formula(paste("Surv(os_sample_years,os_status)~","allelic_status"))
kmfit = survfit(ff,data=dd)
kmfit$call$formula <- ff
myleg = paste0(gsub("allelic_status=","",names(kmfit$strata))," (N=",kmfit$n,")")

ggs = ggsurvplot(kmfit, data=dd, legend.title="",legend.labs=myleg, conf.int=F, size=2.8, xlim=c(0,15),break.time.by=5)$plot +
scale_color_manual(values=c("#737373",col.status)) + xlab("Years")

ptOS = pairwise_survdiff(formula=ff, data=dd, p.adjust.method = "none")

ggOS = ggs + themePP + theme(legend.key.width = unit(1.5,"cm")) +
annotate("segment",color=col.status[1],x=4,xend=5.5,y=1,yend=1,size=1.8) + 
annotate("text",label="vs",y=1,x=6.1,size=10) +
annotate("segment",color=col.status[2],x=6.7,xend=8.2,y=1,yend=1,size=1.8) +
annotate("text",label=paste("p","=",signif(ptOS$p.value[2,2],1)," by Log-Rank test"),x=12,y=1,size=10) +
ylab("Probability of overall survival") + topleg

ggOS
In [324]:
# Pairwise Log-rank tests:
ptOS$p.value
A matrix: 2 Γ— 2 of type dbl
WT1mut
1mut 1.645970e-01 NA
multi2.045641e-1084.512274e-18
In [325]:
# Hazard Ratio multi vs. 1mut:
summary( coxph(ff,data=dd[dd$allelic_status%in%c("1mut","multi"),]) )
Call:
coxph(formula = ff, data = dd[dd$allelic_status %in% c("1mut", 
    "multi"), ])

  n= 368, number of events= 263 
   (10 observations deleted due to missingness)

                       coef exp(coef) se(coef)      z Pr(>|z|)    
allelic_status1mut  -1.2972    0.2733   0.1578 -8.222   <2e-16 ***
allelic_statusmulti      NA        NA   0.0000     NA       NA    
---
Signif. codes:  0 β€˜***’ 0.001 β€˜**’ 0.01 β€˜*’ 0.05 β€˜.’ 0.1 β€˜ ’ 1

                    exp(coef) exp(-coef) lower .95 upper .95
allelic_status1mut     0.2733      3.659    0.2006    0.3723
allelic_statusmulti        NA         NA        NA        NA

Concordance= 0.627  (se = 0.015 )
Likelihood ratio test= 80.7  on 1 df,   p=<2e-16
Wald test            = 67.59  on 1 df,   p=<2e-16
Score (logrank) test = 75.04  on 1 df,   p=<2e-16

AMLt per TP53 allelic stateΒΆ

AMLt: AML transformation

In [326]:
options(repr.plot.width=13, repr.plot.height=8)
tmp = dd
tmp = tmp[ !grepl("AML",tmp$WHO_2016) , ] # EXCLUDE AML PATIENTS for AMLt analysis
cmfit = with(tmp,cuminc(comp_sample_years,comp_status,allelic_status,cencode="censor"))
tlg = table(tmp$allelic_status[!is.na(tmp$comp_sample_years)])
myleg = paste0(" ", names(tlg)," (N=",tlg,")  ")
cmfit.test = with(tmp[tmp$allelic_status!="WT",],cuminc(comp_sample_years,comp_status,allelic_status,cencode="censor"))
ptAMLt = cmfit.test$Tests["aml","pv"]

ggComp = gg_competingrisks.cuminc(cmfit,line.size=2.8,group.levels=c("WT","1mut","multi")) +
themePP + topleg + xlim(c(0,15)) + theme(legend.key.width = unit(1.5,"cm")) +
scale_color_manual(values=c("#737373",col.status), labels=myleg) + nolegtitle +
annotate("segment",color=col.status[1],x=5,xend=6.5,y=0.6,yend=0.6,size=1.8) +
annotate("text",label="vs",y=0.6,x=7.1,size=10) +
annotate("segment",color=col.status[2],x=7.7,xend=9.2,y=0.6,yend=0.6,size=1.8) +
annotate("text",label=paste("p","=",signif(ptAMLt,2)," by Gray's test"),x=12.5,y=0.6,size=10)

ggComp
297 cases omitted due to missing values
37 cases omitted due to missing values
Warning message in if (!is.na(group.levels)) {:
β€œthe condition has length > 1 and only the first element will be used”
Warning message:
β€œRemoved 2 row(s) containing missing values (geom_path).”

Cox model for OSΒΆ

Multivariate Cox model of OS with state-of-the-art IPSS-R variables and TP53 allelic state.

In [327]:
# Complete cases
i.complete.clinical = !is.na(dd$AGE)&!is.na(dd$HB)&!is.na(dd$PLT)&!is.na(dd$ANC)&!is.na(dd$BM_BLAST)&!is.na(dd$CYTO_IPSSR)
i.complete.os = i.complete.clinical & !is.na(dd$os_sample_years) & !is.na(dd$os_status)
i.complete.aml = i.complete.clinical & !is.na(dd$aml_sample_years) & !is.na(dd$aml_status)
In [328]:
dd$CYTOGENETIC_IPSSR = dd$CYTO_IPSSR
dd$CYTOGENETIC_IPSSR = factor(dd$CYTOGENETIC_IPSSR, levels=c("INT","VERY-GOOD","GOOD","POOR","VERY-POOR"))
tmp.os = dd[i.complete.os,]
In [329]:
div_mean_calcul <- function(x) {
    return(x/mean(x))
}
tmp.os$AGEs = tmp.os$AGE/10
tmp.os$BMz = div_mean_calcul(tmp.os$BM_BLAST)
tmp.os$ANCz = div_mean_calcul(tmp.os$ANC)
tmp.os$PLTz = div_mean_calcul(tmp.os$PLT)
tmp.os$HBz = div_mean_calcul(tmp.os$HB)
In [330]:
model.os <- coxph( Surv(os_sample_years, os_status) ~ AGEs + HBz + PLTz + ANCz + BMz + CYTOGENETIC_IPSSR + allelic_status,
                data = tmp.os)
In [331]:
summary(model.os)
Call:
coxph(formula = Surv(os_sample_years, os_status) ~ AGEs + HBz + 
    PLTz + ANCz + BMz + CYTOGENETIC_IPSSR + allelic_status, data = tmp.os)

  n= 2717, number of events= 1290 
   (21 observations deleted due to missingness)

                               coef exp(coef) se(coef)       z Pr(>|z|)    
AGEs                        0.32070   1.37809  0.02636  12.164  < 2e-16 ***
HBz                        -1.79049   0.16688  0.14668 -12.207  < 2e-16 ***
PLTz                       -0.31988   0.72624  0.04125  -7.755 8.81e-15 ***
ANCz                        0.07655   1.07956  0.01130   6.774 1.25e-11 ***
BMz                         0.22834   1.25651  0.01916  11.916  < 2e-16 ***
CYTOGENETIC_IPSSRVERY-GOOD -0.46220   0.62989  0.17860  -2.588 0.009656 ** 
CYTOGENETIC_IPSSRGOOD      -0.33014   0.71882  0.07861  -4.200 2.67e-05 ***
CYTOGENETIC_IPSSRPOOR       0.55958   1.74994  0.12961   4.317 1.58e-05 ***
CYTOGENETIC_IPSSRVERY-POOR  0.51934   1.68092  0.14855   3.496 0.000472 ***
allelic_status1mut         -0.01767   0.98249  0.15609  -0.113 0.909879    
allelic_statusmulti         0.71432   2.04279  0.13129   5.441 5.30e-08 ***
---
Signif. codes:  0 β€˜***’ 0.001 β€˜**’ 0.01 β€˜*’ 0.05 β€˜.’ 0.1 β€˜ ’ 1

                           exp(coef) exp(-coef) lower .95 upper .95
AGEs                          1.3781     0.7256    1.3087    1.4512
HBz                           0.1669     5.9924    0.1252    0.2225
PLTz                          0.7262     1.3770    0.6698    0.7874
ANCz                          1.0796     0.9263    1.0559    1.1037
BMz                           1.2565     0.7959    1.2102    1.3046
CYTOGENETIC_IPSSRVERY-GOOD    0.6299     1.5876    0.4439    0.8939
CYTOGENETIC_IPSSRGOOD         0.7188     1.3912    0.6162    0.8386
CYTOGENETIC_IPSSRPOOR         1.7499     0.5714    1.3574    2.2560
CYTOGENETIC_IPSSRVERY-POOR    1.6809     0.5949    1.2563    2.2490
allelic_status1mut            0.9825     1.0178    0.7235    1.3341
allelic_statusmulti           2.0428     0.4895    1.5793    2.6423

Concordance= 0.735  (se = 0.007 )
Likelihood ratio test= 847.5  on 11 df,   p=<2e-16
Wald test            = 967.6  on 11 df,   p=<2e-16
Score (logrank) test = 1113  on 11 df,   p=<2e-16
In [332]:
x <- summary(model.os)
res.df.os = as.data.frame(cbind(x$coef[,c(2,5)],x$conf.int[,c(3,4)]))
myvariable = c("Age","Hemoglobin","Platelets","ANC","Bone marrow blasts",
                        "Cytogenetic very-good","Cytogenetic good","Cytogenetic poor","Cytogenetic very-poor",
                        "TP53 mono-allelic","TP53 multi-hit"
                       )
res.df.os$variable = factor(myvariable, levels=rev(myvariable))
colnames(res.df.os) = c("HR","pval","HRlow","HRhigh","variable")
res.df.os$annot = sapply(res.df.os$pval, pval_to_signif_code)
res.df.os$type = c("age",rep("clin",4),rep("cyto",4),"mono","multi")
In [333]:
gos1 = ggplot(res.df.os, aes(x=variable,y=HR,colour=type)) + 
geom_point(size=7) + 
geom_hline(yintercept=1,color="darkgrey",size=1.2) + 
geom_hline(yintercept=0.5,color="lightgrey",size=0.6) + 
geom_hline(yintercept=0.2,color="lightgrey",size=0.6) + 
geom_hline(yintercept=2,color="lightgrey",size=0.6) + 
geom_hline(yintercept=3,color="lightgrey",size=0.6) +
geom_errorbar(aes(ymin=HRlow,ymax=HRhigh,colour=type),size=1.5,width=0.6) + 
geom_text(aes(x=variable,y=0.1,label=annot),col="black",size=8) + 
scale_y_log10(breaks=c(0.2,0.5,1,2,3),labels=c(0.2,0.5,1,2,3)) + ##737373
scale_colour_manual(values=c(rep("#374E55FF",3),col.status)) +
themePP + noytitle + ylab("\n Hazard ratio for OS (95% CI)") + coord_flip() +  noleg

gos1

Cause-specific Cox model for AMLtΒΆ

Multivariate cause-specific Cox model of OS with state-of-the-art IPSS-R variables and TP53 allelic state.

In [334]:
# YOU DO NOT INCLUDE AML SAMPLES WHEN EVALUATED PREGRESSION MDS - AML
tmp.aml = dd[i.complete.aml,]
tmp.aml = tmp.aml[ !grepl("AML",tmp.aml$WHO_2016) , ]
tmp.aml$AGEs = tmp.aml$AGE/10
tmp.aml$BMz = div_mean_calcul(tmp.aml$BM_BLAST)
tmp.aml$ANCz = div_mean_calcul(tmp.aml$ANC)
tmp.aml$PLTz = div_mean_calcul(tmp.aml$PLT)
tmp.aml$HBz = div_mean_calcul(tmp.aml$HB)
model.aml <- coxph( Surv(aml_sample_years, aml_status) ~ AGEs + HBz + PLTz + ANCz + BMz + CYTOGENETIC_IPSSR + allelic_status,
                data = tmp.aml)
In [335]:
summary(model.aml)
Call:
coxph(formula = Surv(aml_sample_years, aml_status) ~ AGEs + HBz + 
    PLTz + ANCz + BMz + CYTOGENETIC_IPSSR + allelic_status, data = tmp.aml)

  n= 2464, number of events= 411 
   (15 observations deleted due to missingness)

                                coef exp(coef)  se(coef)      z Pr(>|z|)    
AGEs                        0.001836  1.001838  0.040460  0.045  0.96380    
HBz                        -0.781237  0.457839  0.250822 -3.115  0.00184 ** 
PLTz                       -0.197230  0.821002  0.071590 -2.755  0.00587 ** 
ANCz                        0.070484  1.073028  0.022208  3.174  0.00150 ** 
BMz                         0.683882  1.981555  0.043806 15.612  < 2e-16 ***
CYTOGENETIC_IPSSRVERY-GOOD -0.731622  0.481128  0.376674 -1.942  0.05210 .  
CYTOGENETIC_IPSSRGOOD      -0.264299  0.767744  0.137253 -1.926  0.05415 .  
CYTOGENETIC_IPSSRPOOR       0.187910  1.206725  0.228961  0.821  0.41181    
CYTOGENETIC_IPSSRVERY-POOR -0.044499  0.956476  0.277280 -0.160  0.87250    
allelic_status1mut         -0.098972  0.905768  0.308445 -0.321  0.74831    
allelic_statusmulti         1.079328  2.942701  0.240896  4.480 7.45e-06 ***
---
Signif. codes:  0 β€˜***’ 0.001 β€˜**’ 0.01 β€˜*’ 0.05 β€˜.’ 0.1 β€˜ ’ 1

                           exp(coef) exp(-coef) lower .95 upper .95
AGEs                          1.0018     0.9982    0.9255    1.0845
HBz                           0.4578     2.1842    0.2800    0.7485
PLTz                          0.8210     1.2180    0.7135    0.9447
ANCz                          1.0730     0.9319    1.0273    1.1208
BMz                           1.9816     0.5047    1.8185    2.1592
CYTOGENETIC_IPSSRVERY-GOOD    0.4811     2.0784    0.2300    1.0067
CYTOGENETIC_IPSSRGOOD         0.7677     1.3025    0.5867    1.0047
CYTOGENETIC_IPSSRPOOR         1.2067     0.8287    0.7704    1.8902
CYTOGENETIC_IPSSRVERY-POOR    0.9565     1.0455    0.5555    1.6470
allelic_status1mut            0.9058     1.1040    0.4948    1.6579
allelic_statusmulti           2.9427     0.3398    1.8353    4.7184

Concordance= 0.78  (se = 0.012 )
Likelihood ratio test= 402.8  on 11 df,   p=<2e-16
Wald test            = 461.5  on 11 df,   p=<2e-16
Score (logrank) test = 577.6  on 11 df,   p=<2e-16
In [336]:
x <- summary(model.aml)
res.df.aml = as.data.frame(cbind(x$coef[,c(2,5)],x$conf.int[,c(3,4)]))
res.df.aml$variable = factor(myvariable, levels=rev(myvariable))
colnames(res.df.aml) = c("HR","pval","HRlow","HRhigh","variable")
res.df.aml$annot = sapply(res.df.aml$pval, pval_to_signif_code)
res.df.aml$type = c("age",rep("clin",4),rep("cyto",4),"mono","multi")
In [337]:
gaml1 = ggplot(res.df.aml, aes(x=variable,y=HR,colour=type)) + 
geom_point(size=7) + 
geom_hline(yintercept=1,color="darkgrey",size=1.2) + 
geom_hline(yintercept=0.5,color="lightgrey",size=0.6) + geom_hline(yintercept=2,color="lightgrey",size=0.6) + 
geom_hline(yintercept=3,color="lightgrey",size=0.6) +
geom_errorbar(aes(ymin=HRlow,ymax=HRhigh,colour=type),size=1.5,width=0.6) + 
geom_text(aes(x=variable,y=0.16,label=annot),col="black",size=8) + 
scale_y_log10(breaks=c(0.5,1,2,3),labels=c(0.5,1,2,3)) + ##737373
scale_colour_manual(values=c(rep("#374E55FF",3),col.status)) +
themePP + noytitle + ylab("\n Hazard ratio for AMLt (95% CI)") + coord_flip() +  noleg

gaml1

Alternative: competing risk Fine and Gray model for AMLtΒΆ

An alternative modeling approach to cause-specific Cox model for AML transformation in the competing risk model proposed by Fine and Gray, where death without transformation is modeled as a competing risk.

We show here the results of the Fine and Gray approach for AMLt with state-of-the-art IPSS-R variables and TP53 allelic state.

In [344]:
tmp.aml.crr = tmp.aml[!is.na(tmp.aml$comp_status),]
tmp.aml.crr = tmp.aml.crr[!is.na(tmp.aml.crr$comp_sample_years),]
tmp.aml.crr = tmp.aml.crr[which(tmp.aml.crr$allelic_status%in%c("WT","1mut","multi")),]
tmp.aml.crr.cov = cbind(tmp.aml.crr[,c("AGEs","HBz","PLTz","ANCz","BMz")] , 
                        model.matrix(~0 + CYTOGENETIC_IPSSR, data=tmp.aml.crr)[,-1] , 
                        model.matrix(~0 + allelic_status, data=tmp.aml.crr)[,-1])
colnames(tmp.aml.crr.cov)[10:11] = c("TP53mono","TP53multi")
res.aml.crr = crr(ftime=tmp.aml.crr$comp_sample_years, fstatus=as.vector(tmp.aml.crr$comp_status),
    cov1=tmp.aml.crr.cov,
    failcode="aml",
    cencode="censor"
   )
In [345]:
summary(res.aml.crr)
Competing Risks Regression

Call:
crr(ftime = tmp.aml.crr$comp_sample_years, fstatus = as.vector(tmp.aml.crr$comp_status), 
    cov1 = tmp.aml.crr.cov, failcode = "aml", cencode = "censor")

                               coef exp(coef) se(coef)       z p-value
AGEs                       -0.10377     0.901   0.0367 -2.8279  0.0047
HBz                        -0.12867     0.879   0.2548 -0.5050  0.6100
PLTz                       -0.07271     0.930   0.0718 -1.0125  0.3100
ANCz                        0.04195     1.043   0.0260  1.6144  0.1100
BMz                         0.61759     1.854   0.0449 13.7401  0.0000
CYTOGENETIC_IPSSRVERY-GOOD -0.58174     0.559   0.3718 -1.5648  0.1200
CYTOGENETIC_IPSSRGOOD      -0.21989     0.803   0.1408 -1.5618  0.1200
CYTOGENETIC_IPSSRPOOR      -0.00988     0.990   0.2360 -0.0419  0.9700
CYTOGENETIC_IPSSRVERY-POOR -0.17171     0.842   0.3020 -0.5686  0.5700
TP53mono                   -0.08868     0.915   0.3002 -0.2954  0.7700
TP53multi                   0.68621     1.986   0.2643  2.5966  0.0094

                           exp(coef) exp(-coef)  2.5% 97.5%
AGEs                           0.901      1.109 0.839 0.969
HBz                            0.879      1.137 0.534 1.449
PLTz                           0.930      1.075 0.808 1.070
ANCz                           1.043      0.959 0.991 1.097
BMz                            1.854      0.539 1.698 2.025
CYTOGENETIC_IPSSRVERY-GOOD     0.559      1.789 0.270 1.158
CYTOGENETIC_IPSSRGOOD          0.803      1.246 0.609 1.058
CYTOGENETIC_IPSSRPOOR          0.990      1.010 0.623 1.572
CYTOGENETIC_IPSSRVERY-POOR     0.842      1.187 0.466 1.522
TP53mono                       0.915      1.093 0.508 1.648
TP53multi                      1.986      0.503 1.183 3.334

Num. cases = 2461
Pseudo Log-likelihood = -2886 
Pseudo likelihood ratio test = 276  on 11 df,
In [346]:
xx = summary(res.aml.crr)
res.df.aml.crr = as.data.frame(cbind(xx$coef[,c(2,5)],xx$conf.int[,c(3,4)]))
res.df.aml.crr$variable = factor(myvariable, levels=rev(myvariable))
colnames(res.df.aml.crr) = c("HR","pval","HRlow","HRhigh","variable")
res.df.aml.crr$annot = sapply(res.df.aml.crr$pval, pval_to_signif_code)
res.df.aml.crr$type = c("age",rep("clin",4),rep("cyto",4),"mono","multi")
In [347]:
gaml1.crr = ggplot(res.df.aml.crr, aes(x=variable,y=HR,colour=type)) + 
geom_point(size=7) + 
geom_hline(yintercept=1,color="darkgrey",size=1.2) + 
geom_hline(yintercept=0.5,color="lightgrey",size=0.6) + geom_hline(yintercept=2,color="lightgrey",size=0.6) + 
geom_hline(yintercept=3,color="lightgrey",size=0.6) +
geom_errorbar(aes(ymin=HRlow,ymax=HRhigh,colour=type),size=1.5,width=0.6) + 
geom_text(aes(x=variable,y=0.16,label=annot),col="black",size=8) + 
scale_y_log10(breaks=c(0.5,1,2,3),labels=c(0.5,1,2,3)) + ##737373
scale_colour_manual(values=c(rep("#374E55FF",3),col.status)) +
themePP + noytitle + ylab("\n Hazard ratio for AMLt (95% CI) (Fine and Gray model)") + coord_flip() +  noleg
gaml1.crr

Main Figure 4ΒΆ

In [348]:
tmp = dd[which(dd$MDS_TYPE!="secondary"),] # too few number for secondary mds for reliable analysis
tmp$mds_type = "de-novo"
tmp$mds_type[tmp$MDS_TYPE%in%c("therapy_related")] = "therapy-related"
table(tmp$mds_type,tmp$allelic_status)
                 
                    WT 1mut multi
  de-novo         2552  101   184
  therapy-related  162   10    52
In [349]:
options(repr.plot.width=15, repr.plot.height=9)

ff = as.formula(paste("Surv(os_sample_years,os_status)~","allelic_status + mds_type"))
kmfit = survfit(ff,data=tmp)
kmfit$call$formula <- ff

gg.therapy.OS = ggsurvplot(kmfit, data=tmp,legend.title=c("TP53 state"),
                                conf.int=F, size=2.0, xlim=c(0,15),
                                linetype="mds_type",color="allelic_status",breaks=5,
                                font.x=30, font.y=30, font.tickslab=30, font.legend=30
                               )$plot +
theme(legend.key.width = unit(2.8,"cm")) + theme(legend.box = "vertical",legend.position = c(0.85,0.79)) + 
scale_color_manual(values=c("#737373",col.status)) + ylab("Probability of overall survival") + xlab("Years") + 
scale_linetype_manual(values=c("solid","dashed","dotted"),name="Type of MDS")

gg.therapy.OS
In [350]:
# Pairwise Log-rank test p-values:
pt.therapy.os = pairwise_survdiff(formula=ff, data=tmp, p.adjust.method = "none")
pt.therapy.os$p.value
A matrix: 5 Γ— 5 of type dbl
allelic_status=WT, mds_type=de-novo allelic_status=WT, mds_type=therapy-relatedallelic_status=1mut, mds_type=de-novo allelic_status=1mut, mds_type=therapy-relatedallelic_status=multi, mds_type=de-novo
allelic_status=WT, mds_type=therapy-related2.924188e-04 NA NA NA NA
allelic_status=1mut, mds_type=de-novo 6.074953e-011.086055e-01 NA NA NA
allelic_status=1mut, mds_type=therapy-related2.127219e-014.564242e-012.895056e-01 NA NA
allelic_status=multi, mds_type=de-novo 1.612498e-844.057595e-186.408046e-180.03421598 NA
allelic_status=multi, mds_type=therapy-related1.208915e-305.234532e-111.055195e-120.043403650.8802042
In [351]:
# Hazard Ratio multi vs. 1mut in t-MDS setting:
summary( coxph(as.formula(paste("Surv(os_sample_years,os_status)~","allelic_status")),data=tmp[tmp$allelic_status%in%c("1mut","multi") & tmp$mds_type=="therapy-related",]) )
Call:
coxph(formula = as.formula(paste("Surv(os_sample_years,os_status)~", 
    "allelic_status")), data = tmp[tmp$allelic_status %in% c("1mut", 
    "multi") & tmp$mds_type == "therapy-related", ])

  n= 62, number of events= 49 

                       coef exp(coef) se(coef)      z Pr(>|z|)  
allelic_status1mut  -0.9394    0.3909   0.4828 -1.946   0.0517 .
allelic_statusmulti      NA        NA   0.0000     NA       NA  
---
Signif. codes:  0 β€˜***’ 0.001 β€˜**’ 0.01 β€˜*’ 0.05 β€˜.’ 0.1 β€˜ ’ 1

                    exp(coef) exp(-coef) lower .95 upper .95
allelic_status1mut     0.3909      2.558    0.1517     1.007
allelic_statusmulti        NA         NA        NA        NA

Concordance= 0.534  (se = 0.035 )
Likelihood ratio test= 4.74  on 1 df,   p=0.03
Wald test            = 3.79  on 1 df,   p=0.05
Score (logrank) test = 4.04  on 1 df,   p=0.04
In [352]:
# Hazard Ratio multi vs. WT in t-MDS setting:
summary( coxph(as.formula(paste("Surv(os_sample_years,os_status)~","allelic_status")),data=tmp[tmp$allelic_status%in%c("WT","multi") & tmp$mds_type=="therapy-related",]) )
Call:
coxph(formula = as.formula(paste("Surv(os_sample_years,os_status)~", 
    "allelic_status")), data = tmp[tmp$allelic_status %in% c("WT", 
    "multi") & tmp$mds_type == "therapy-related", ])

  n= 214, number of events= 136 

                      coef exp(coef) se(coef)     z Pr(>|z|)    
allelic_status1mut      NA        NA   0.0000    NA       NA    
allelic_statusmulti 1.2084    3.3480   0.1945 6.214 5.17e-10 ***
---
Signif. codes:  0 β€˜***’ 0.001 β€˜**’ 0.01 β€˜*’ 0.05 β€˜.’ 0.1 β€˜ ’ 1

                    exp(coef) exp(-coef) lower .95 upper .95
allelic_status1mut         NA         NA        NA        NA
allelic_statusmulti     3.348     0.2987     2.287     4.901

Concordance= 0.615  (se = 0.02 )
Likelihood ratio test= 33.09  on 1 df,   p=9e-09
Wald test            = 38.61  on 1 df,   p=5e-10
Score (logrank) test = 43.13  on 1 df,   p=5e-11
In [353]:
# Hazard Ratio 1mut vs. WT in t-MDS setting:
summary( coxph(as.formula(paste("Surv(os_sample_years,os_status)~","allelic_status")),data=tmp[tmp$allelic_status%in%c("WT","1mut") & tmp$mds_type=="therapy-related",]) )
Call:
coxph(formula = as.formula(paste("Surv(os_sample_years,os_status)~", 
    "allelic_status")), data = tmp[tmp$allelic_status %in% c("WT", 
    "1mut") & tmp$mds_type == "therapy-related", ])

  n= 172, number of events= 101 

                      coef exp(coef) se(coef)     z Pr(>|z|)
allelic_status1mut  0.2947    1.3427   0.3944 0.747    0.455
allelic_statusmulti     NA        NA   0.0000    NA       NA

                    exp(coef) exp(-coef) lower .95 upper .95
allelic_status1mut      1.343     0.7447    0.6199     2.909
allelic_statusmulti        NA         NA        NA        NA

Concordance= 0.514  (se = 0.016 )
Likelihood ratio test= 0.51  on 1 df,   p=0.5
Wald test            = 0.56  on 1 df,   p=0.5
Score (logrank) test = 0.56  on 1 df,   p=0.5

OS after HMAΒΆ

We consider OS after start of HMA therapy per TP53 allelic state.

HMA: hypomethylating agent

In [357]:
tmp = dd[which(dd$hma==1 & dd$allelic_status%in%c("WT","1mut","multi")),]
ff = as.formula(paste("Surv(os_hma_years,os_status)~","allelic_status"))
kmfit = survfit(ff,data=tmp)
kmfit$call$formula <- ff

myleg = paste0(gsub("allelic_status=","",names(kmfit$strata))," (N=",kmfit$n,")")
gg.hma.OS = ggsurvplot(kmfit, data=tmp,legend.title=c("TP53 state"),legend.labs=myleg,
                                conf.int=F, size=2.5, xlim=c(0,7),break.time.by=1,
                                color="allelic_status", palette=c("#737373",col.status),
                                breaks=2,
                                font.x=30, font.y=30, font.tickslab=30, font.legend=30,
                                tables.col="allelic_status",   
                                risk.table=TRUE,risk.table.height=0.25,risk.table.fontsize=10,tables.y.text=FALSE#,tables.theme=theme_cleantable()
                               )
gg.hma.OS.plot = gg.hma.OS$plot + theme(legend.key.width = unit(2,"cm")) + ylab("Probability of overall survival") + xlab("Years since HMA")
gg.hma.OS.table = gg.hma.OS$table + ylab("\n") + noleg + theme(axis.title.x=element_blank(),axis.text.x=element_blank()) + noleg + theme(axis.title.y = element_text(size = 30))
pt.hma.os = pairwise_survdiff(formula=ff, data=tmp, p.adjust.method = "none")

pvvb = pt.hma.os$p.value[2,2]
gg.hma.OS.plot = gg.hma.OS.plot  + 
annotate("segment",color=col.status[1],x=4,xend=4.5,y=1,yend=1,size=1.3) + 
annotate("text",label="vs",y=1,x=5,size=7) +
annotate("segment",color=col.status[2],x=5.5,xend=6,y=1,yend=1,size=1.3) +
annotate("text",label=paste("p=",signif(pvvb,2)),x=6.5,y=1,size=7)
Warning message:
β€œVectorized input to `element_text()` is not officially supported.
Results may be unexpected or may change in future versions of ggplot2.”
In [356]:
options(repr.plot.width=15, repr.plot.height=10)
ggHMA = ggarrange(gg.hma.OS.plot , gg.hma.OS.table+theme(plot.margin = unit(c(0, 0, 0.5, 0.15), "cm")), 
          nrow=2, ncol=1,
          heights=c(3,1)
         )
ggHMA
In [358]:
# Pairwise Log-rank test p-values:
pt.hma.os$p.value
A matrix: 2 Γ— 2 of type dbl
WT1mut
1mut9.107089e-01 NA
multi1.187625e-200.0004337702
In [359]:
# Hazard Ratio multi vs. 1mut after HMA:
summary( coxph(as.formula(paste("Surv(os_sample_years,os_status)~","allelic_status")),data=tmp[tmp$allelic_status%in%c("1mut","multi"),]) )
Call:
coxph(formula = as.formula(paste("Surv(os_sample_years,os_status)~", 
    "allelic_status")), data = tmp[tmp$allelic_status %in% c("1mut", 
    "multi"), ])

  n= 145, number of events= 114 
   (1 observation deleted due to missingness)

                       coef exp(coef) se(coef)      z Pr(>|z|)    
allelic_status1mut  -1.1225    0.3255   0.2966 -3.785 0.000154 ***
allelic_statusmulti      NA        NA   0.0000     NA       NA    
---
Signif. codes:  0 β€˜***’ 0.001 β€˜**’ 0.01 β€˜*’ 0.05 β€˜.’ 0.1 β€˜ ’ 1

                    exp(coef) exp(-coef) lower .95 upper .95
allelic_status1mut     0.3255      3.072     0.182    0.5821
allelic_statusmulti        NA         NA        NA        NA

Concordance= 0.564  (se = 0.023 )
Likelihood ratio test= 18.19  on 1 df,   p=2e-05
Wald test            = 14.32  on 1 df,   p=2e-04
Score (logrank) test = 15.55  on 1 df,   p=8e-05

OS after Lenalidomid (for del5q patients)ΒΆ

We consider OS after start of Lenalidomid therapy per TP53 allelic state, restricted to del5q patients.

In [363]:
tmp = cbind(dd, ddcyto[,"del5q",drop=F])
hmp = tmp[tmp$lenalidomid==1 & tmp$del5q==1,]
hmp = hmp[which(dd$allelic_status%in%c("WT","1mut","multi")),]
table(hmp$lenalidomid,hmp$allelic_status,exclude=F)
      
         WT 1mut multi <NA>
  1      73   12    17    2
  <NA>    0    0     0 3196
In [364]:
ff = as.formula(paste("Surv(os_len_years,os_status)~","allelic_status"))
kmfit = survfit(ff,data=hmp)
kmfit$call$formula <- ff
myleg = paste0(gsub("allelic_status=","",names(kmfit$strata))," (N=",kmfit$n,")")
gg.len.OS.del5q = ggsurvplot(kmfit, data=hmp,legend.title=c("TP53 state"),legend.labs=myleg,
                                conf.int=F, size=2.5, xlim=c(0,7),break.time.by=1,
                                color="allelic_status", palette=c("#737373",col.status),
                                breaks=2,
                                font.x=30, font.y=30, font.tickslab=30, font.legend=30,
                                tables.col="allelic_status",   
                                risk.table=TRUE,risk.table.height=0.25,risk.table.fontsize=10,tables.y.text=FALSE #,tables.theme=theme_cleantable()
                               )
gg.len.OS.plot.del5q = gg.len.OS.del5q$plot + theme(legend.key.width = unit(2,"cm")) + ylab("Probability of overall survival") + xlab("Years since Lenalidomide")
gg.len.OS.table.del5q = gg.len.OS.del5q$table + ylab("\n") + noleg + theme(axis.title.x=element_blank(),axis.text.x=element_blank()) + noleg + theme(axis.title.y = element_text(size = 30))

pt.len.os = pairwise_survdiff(formula=ff, data=hmp, p.adjust.method = "none")

pvvb = pt.len.os$p.value[2,2]
gg.len.OS.plot.del5q = gg.len.OS.plot.del5q  + 
annotate("segment",color=col.status[1],x=4,xend=4.5,y=1,yend=1,size=1.3) + 
annotate("text",label="vs",y=1,x=5,size=7) +
annotate("segment",color=col.status[2],x=5.5,xend=6,y=1,yend=1,size=1.3) +
#annotate("text",label=expression(paste(p,"<",10^-4)),x=6.5,y=1,size=7)
annotate("text",label=paste("p","<",signif(pt.len.os$p.value[2,2],2)),x=6.5,y=1,size=7)
Warning message:
β€œVectorized input to `element_text()` is not officially supported.
Results may be unexpected or may change in future versions of ggplot2.”
In [365]:
options(repr.plot.width=15, repr.plot.height=10)
ggLENPT.del5q = ggarrange(gg.len.OS.plot.del5q , gg.len.OS.table.del5q+theme(plot.margin = unit(c(0, 0, 0.5, 0.15), "cm")), 
          nrow=2, ncol=1,
          heights=c(3,1)
         )
ggLENPT.del5q
In [366]:
#Pairwise p-values Log-Rank test:
pt.len.os$p.value
A matrix: 2 Γ— 2 of type dbl
WT1mut
1mut5.516979e-01 NA
multi1.104096e-093.113332e-05
In [367]:
# Hazard Ratio multi vs. 1mut after Lenalidomid on del5q patients:
summary( coxph(as.formula(paste("Surv(os_sample_years,os_status)~","allelic_status")),data=hmp[hmp$allelic_status%in%c("1mut","multi"),]) )
Call:
coxph(formula = as.formula(paste("Surv(os_sample_years,os_status)~", 
    "allelic_status")), data = hmp[hmp$allelic_status %in% c("1mut", 
    "multi"), ])

  n= 29, number of events= 20 

                        coef exp(coef) se(coef)      z Pr(>|z|)   
allelic_status1mut  -2.41045   0.08978  0.77200 -3.122  0.00179 **
allelic_statusmulti       NA        NA  0.00000     NA       NA   
---
Signif. codes:  0 β€˜***’ 0.001 β€˜**’ 0.01 β€˜*’ 0.05 β€˜.’ 0.1 β€˜ ’ 1

                    exp(coef) exp(-coef) lower .95 upper .95
allelic_status1mut    0.08978      11.14   0.01977    0.4077
allelic_statusmulti        NA         NA        NA        NA

Concordance= 0.718  (se = 0.048 )
Likelihood ratio test= 15.28  on 1 df,   p=9e-05
Wald test            = 9.75  on 1 df,   p=0.002
Score (logrank) test = 14.33  on 1 df,   p=2e-04

OS after HSCTΒΆ

We consider OS following HSCT per TP53 allelic state.

HSCT: hematopoietic stem cell transplantation.

In [368]:
table(dd$allelic_status, dd$transplant)
       
           0    1
  WT    2641  281
  1mut   118    7
  multi  223   30
In [370]:
options(repr.plot.width=15, repr.plot.height=8)
tmp = dd[which(dd$transplant==1 & dd$allelic_status%in%c("WT","1mut","multi")),]
ff = as.formula(paste("Surv(os_transplant_years,os_status)~","allelic_status"))
kmfit = survfit(ff,data=tmp)
kmfit$call$formula <- ff
myleg = paste0(gsub("allelic_status=","",names(kmfit$strata))," (N=",kmfit$n,")")
gg.transplant.OS = ggsurvplot(kmfit, data=tmp,legend.title=c("TP53 state"),legend.labs=myleg,
                                conf.int=F, size=2.5, xlim=c(0,7),break.time.by=1,
                                color="allelic_status",palette=c("#737373",col.status),
                                font.x=30, font.y=30, font.tickslab=30, font.legend=30,
                                tables.col="allelic_status",   
                                risk.table=TRUE,risk.table.height=0.25,risk.table.fontsize=10,tables.y.text=FALSE
                               )
gg.transplant.OS.plot = gg.transplant.OS$plot + theme(legend.key.width = unit(2,"cm")) + ylab("Probability of overall survival") + 
xlab("Years since HSCT")
gg.transplant.OS.table = gg.transplant.OS$table + ylab("\n") + noleg + theme(axis.title.x=element_blank(),axis.text.x=element_blank()) + noleg + theme(axis.title.y = element_text(size = 30))

pt.transplant.os = pairwise_survdiff(formula=ff, data=tmp, p.adjust.method = "none")
pvva = pt.transplant.os$p.value[1,1]
pvvb = pt.transplant.os$p.value[2,2]
gg.transplant.OS.plot = gg.transplant.OS.plot  + 
annotate("segment",color=col.status[1],x=3.5,xend=4,y=1,yend=1,size=1.3) + 
annotate("text",label="vs",y=1,x=4.5,size=7) +
annotate("segment",color=col.status[2],x=5,xend=5.5,y=1,yend=1,size=1.3) +
annotate("text",label=paste("p=",signif(pvvb,2)),x=6,y=1,size=7)
Warning message:
β€œVectorized input to `element_text()` is not officially supported.
Results may be unexpected or may change in future versions of ggplot2.”
In [371]:
options(repr.plot.width=15, repr.plot.height=10)
ggTPLPT = ggarrange(gg.transplant.OS.plot , gg.transplant.OS.table+theme(plot.margin = unit(c(0, 0, 0.5, 0.15), "cm")), 
          nrow=2, ncol=1,
          heights=c(3,1)
         )
ggTPLPT
In [372]:
# Pairwise Log-Rank test p-values:
pt.transplant.os$p.value
A matrix: 2 Γ— 2 of type dbl
WT1mut
1mut0.9472191576 NA
multi0.00030637860.09928299
In [373]:
# Hazard Ratio multi vs. 1mut following HSCT:
summary( coxph(as.formula(paste("Surv(os_sample_years,os_status)~","allelic_status")),data=tmp[tmp$allelic_status%in%c("1mut","multi"),]) )
Call:
coxph(formula = as.formula(paste("Surv(os_sample_years,os_status)~", 
    "allelic_status")), data = tmp[tmp$allelic_status %in% c("1mut", 
    "multi"), ])

  n= 37, number of events= 24 

                       coef exp(coef) se(coef)      z Pr(>|z|)  
allelic_status1mut  -1.2741    0.2797   0.6362 -2.003   0.0452 *
allelic_statusmulti      NA        NA   0.0000     NA       NA  
---
Signif. codes:  0 β€˜***’ 0.001 β€˜**’ 0.01 β€˜*’ 0.05 β€˜.’ 0.1 β€˜ ’ 1

                    exp(coef) exp(-coef) lower .95 upper .95
allelic_status1mut     0.2797      3.576   0.08038    0.9731
allelic_statusmulti        NA         NA        NA        NA

Concordance= 0.577  (se = 0.053 )
Likelihood ratio test= 5.24  on 1 df,   p=0.02
Wald test            = 4.01  on 1 df,   p=0.05
Score (logrank) test = 4.47  on 1 df,   p=0.03

More: OS across all TP53 subgroupsΒΆ

We look at the OS across all TP53 subgroups, and show that all the multi-hit categories have a very poor survival.

In [374]:
# Outcome
# OS
ff = as.formula(paste("Surv(os_sample_years,os_status)~","detail_status"))
kmfit = survfit(ff,data=dd)
kmfit$call$formula <- ff
myleg = paste0(gsub("detail_status=","",names(kmfit$strata))," (N=",kmfit$n,")")
ggs = ggsurvplot(kmfit, data=dd, legend.title="",legend.labs=myleg,
                 conf.int=F, size=1.5, xlim=c(0,15),break.time.by=5,
                 font.x=18, font.y=18, font.tickslab=18, font.legend=18
                )$plot +
guides(colour = guide_legend(nrow = 2)) + theme(legend.key.width = unit(1.1,"cm")) + 
scale_color_manual(values=c("#737373",col.subtypes)) + xlab("Years")
ptos = pairwise_survdiff(formula=ff, data=dd, p.adjust.method = "none")
ggOS = ggs +
ylab("Probability of overall survival") + nolegtitle +
annotate("segment",color=col.subtypes[2],x=8,xend=9,y=1,yend=1,size=1.3) +
annotate("text",label=paste("p","<",signif(ptos$p.value[2,2],2),"                            "),x=12.2,y=1,size=5) + 
#
annotate("segment",color=col.status[1],x=6,xend=7,y=0.9,yend=0.9,size=1.3) + 
annotate("text",label="vs",y=0.9,x=7.5,size=5) +
annotate("segment",color=col.subtypes[3],x=8,xend=9,y=0.9,yend=0.9,size=1.3) +
annotate("text",label=paste("p","<",signif(ptos$p.value[3,2],2)," by Log-Rank test"),x=12.3,y=0.9,size=5) + 
#
annotate("segment",color=col.subtypes[4],x=8,xend=9,y=0.8,yend=0.8,size=1.3) +
annotate("text",label=paste("p","<",signif(ptos$p.value[4,2],2),"                            "),x=12.2,y=0.8,size=5)
In [375]:
options(repr.plot.width=10, repr.plot.height=7)
ggOS
In [376]:
# Pairwise Log-Rank test p-values:
ptos$p.value
A matrix: 4 Γ— 4 of type dbl
WT1mut>1mutmut+del
1mut1.645970e-01 NA NA NA
>1mut2.170058e-324.617282e-10 NA NA
mut+del3.937558e-507.244185e-150.06684358 NA
mut+cnloh1.464461e-531.074335e-150.033027210.7446638

More: OS across VAF strata and TP53 allelic stateΒΆ

Below we perform optimal cut-point analysis of the VAF for OS.

In [464]:
tmp = dd[!is.na(dd$allelic_status),]
tmp$max_vaf = 0
tmp$max_vaf[which(tmp$allelic_status!="WT")] = sapply(tmp$LEUKID[which(tmp$allelic_status!="WT")], function(x) max(mm[mm$TARGET_NAME==x,"VAF"])) 

rescut = surv_cutpoint(data=tmp[tmp$allelic_status=="1mut",],
                       time = "os_sample_years", event = "os_status", variables="max_vaf",
                       minprop = 0.1, progressbar = TRUE)
rescut
rescut2 = surv_cutpoint(data=tmp[tmp$allelic_status=="1mut" & tmp$max_vaf>rescut$cutpoint[1,1],], 
                       time = "os_sample_years", event = "os_status", variables="max_vaf",
                       minprop = 0.1, progressbar = TRUE)
rescut2
rescut3 = surv_cutpoint(data=tmp[tmp$allelic_status=="1mut" & tmp$max_vaf<rescut$cutpoint[1,1],], 
                       time = "os_sample_years", event = "os_status", variables="max_vaf",
                       minprop = 0.1, progressbar = TRUE)
rescut3
        cutpoint statistic
max_vaf    0.218  2.945483
        cutpoint statistic
max_vaf   0.2283  1.557839
        cutpoint statistic
max_vaf    0.114  1.495633
In [465]:
cutlow=0.11
cutup=0.22
In [466]:
tmp$vaf_cat0 = ""
tmp$vaf_cat0[tmp$allelic_status!="WT" & tmp$max_vaf<=cutlow] = "VAF<=11%"
tmp$vaf_cat0[tmp$allelic_status!="WT" & tmp$max_vaf>cutlow & tmp$max_vaf<=cutup] = "11%<VAF<=22%"
tmp$vaf_cat0[tmp$allelic_status!="WT" & tmp$max_vaf>cutup] = "VAF>22%"

tmp$vaf_cat11 = as.vector(tmp$allelic_status)
tmp$vaf_cat11[which(tmp$allelic_status!="WT")] = paste(as.vector(tmp$allelic_status[which(tmp$allelic_status!="WT")]),
                                                       tmp$vaf_cat0[which(tmp$allelic_status!="WT")],sep=", ")
tmp$vaf_cat11 = factor(tmp$vaf_cat11, levels=unique(tmp$vaf_cat11)[c(1,3,5,4,6,7,2)])

tmp$vaf_catgo = as.vector(tmp$vaf_cat11)
tmp$vaf_catgo[which(tmp$allelic_status=="1mut" & tmp$vaf_cat0!="VAF>22%")] = "1mut, VAF<=22%"
tmp$vaf_catgo = factor(tmp$vaf_catgo, levels=unique(tmp$vaf_catgo)[c(1,3,4,5,6,2)])

table(tmp$vaf_catgo)
                 WT      1mut, VAF<=22%       1mut, VAF>22%     multi, VAF<=11% 
               2922                  87                  38                  20 
multi, 11%<VAF<=22%      multi, VAF>22% 
                 21                 212 
In [489]:
ff = as.formula(paste("Surv(os_sample_years,os_status)~","vaf_catgo"))
kmfit = survfit(ff,data=tmp)
kmfit$call$formula <- ff
myleg = paste0(gsub("vaf_catgo=","",names(kmfit$strata))," (N=",kmfit$n,")")
gvafosmain = ggsurvplot(kmfit, data=tmp, legend.title="",conf.int=F, size=2.8, xlim=c(0,15),breaks=5, pval=F,
           font.x=25, font.y=25, font.tickslab=25, font.legend=20,legend.lab=myleg,
          )$plot +
guides(colour = guide_legend(nrow = 3)) + themePP + topleg +
theme(legend.key.width = unit(1.5,"cm")) + theme(legend.key.size = unit(1.0, "cm")) + 
xlab("Years") + ylab("Probability of overall survival") +
#scale_color_manual(values=c("#737373","#f16913","#a63603","#41b6c4","#225ea8","#253494"),labels=myleg2) + theme(legend.text.align = 0)
scale_color_manual(values=c("#737373","#fec44f","#d95f0e","#7fcdbb","#1d91c0","#0c2c84"),labels=myleg) + 
theme(legend.text.align = 0) + 
theme(legend.text=element_text(size=26))

pt = pairwise_survdiff(ff,data=tmp,p.adjust.method = "none")$p.value

pt
A matrix: 5 Γ— 5 of type dbl
WT1mut, VAF<=22%1mut, VAF>22%multi, VAF<=11%multi, 11%<VAF<=22%
1mut, VAF<=22% 6.167687e-01 NA NA NA NA
1mut, VAF>22% 7.357181e-051.238208e-03 NA NA NA
multi, VAF<=11% 2.955714e-066.788577e-052.232881e-01 NA NA
multi, 11%<VAF<=22% 4.534418e-069.853868e-054.188731e-010.74033481 NA
multi, VAF>22%8.424494e-1106.728363e-194.449103e-050.095997210.02425504
In [490]:
options(repr.plot.width=13, repr.plot.height=10)

gvafosmain = gvafosmain + 
annotate("segment",color="#d95f0e",x=8,xend=9.5,y=1,yend=1,size=1.8) + 
annotate("text",label="vs",y=1,x=10.2,size=10) +
annotate("segment",color="#737373",x=10.7,xend=12.2,y=1,yend=1,size=1.8) +
annotate("text",label=paste("p = ",signif(pt[2,1],2)),x=13.85,y=1,size=10) + 
#
annotate("segment",color="#7fcdbb",x=8,xend=9.5,y=0.9,yend=0.9,size=1.8) + 
annotate("text",label="vs",y=0.9,x=10.2,size=10) +
annotate("segment",color="#737373",x=10.7,xend=12.2,y=0.9,yend=0.9,size=1.8) +
annotate("text",label=paste("p = ",signif(pt[3,1],2)),x=13.8,y=0.9,size=10) + 
#
annotate("segment",color="#0c2c84",x=8,xend=9.5,y=0.8,yend=0.8,size=1.8) + 
annotate("text",label="vs",y=0.8,x=10.2,size=10) +
annotate("segment",color="#d95f0e",x=10.7,xend=12.2,y=0.8,yend=0.8,size=1.8) +
annotate("text",label=paste("p = ",signif(pt[5,3],2)),x=13.85,y=0.8,size=10)

gvafosmain

More : WHO distribution and OS per TP53 allelic stateΒΆ

We look at the representation of WHO subgroups per TP53 allelic state and the OS across the WHO subgroups.

In [402]:
golevels = c("t-MDS","MDS-del5q","MDS-SLD","MDS-RS-SLD","MDS-MLD","MDS-RS-MLD","MDS/MPN-RS-T","MDS-EB1","MDS-EB2","AML/AML-MRC","CMML","aCML","MDS/MPN-U","MDS-U","other")
dd$mywho <- dd$WHO_2016
dd$mywho[which(dd$WHO_2016=="MDS-SLD/MLD")] <- "MDS-SLD"
dd$mywho[which(dd$WHO_2016=="MDS-RS-SLD/MLD")] <- "MDS-RS-MLD"
dd$mywho[which(dd$MDS_TYPE=="therapy_related")] <- "t-MDS"
dd$mywho = factor(dd$mywho, levels=golevels)
In [403]:
table(dd$allelic_status, dd$mywho,exclude=F)
       
        t-MDS MDS-del5q MDS-SLD MDS-RS-SLD MDS-MLD MDS-RS-MLD MDS/MPN-RS-T
  WT      162       109     263        228     544        180           39
  1mut     10        18       6          8      16          6            2
  multi    52         4       7          1      20         12            0
  <NA>      5         0       1          1       4          0            0
       
        MDS-EB1 MDS-EB2 AML/AML-MRC CMML aCML MDS/MPN-U MDS-U other <NA>
  WT        334     309         114  378   40        45    85     5   87
  1mut       24      15           6    7    1         4     1     0    1
  multi      45      72          24   10    2         1     3     0    0
  <NA>        1       5           4    2    0         0     1     0    0
In [404]:
options(repr.plot.width=20, repr.plot.height=7)
tmp = dd[which(dd$allelic_status%in%c("1mut","multi")),]
tmp$label = paste0(tmp$allelic_status,"\n","(N=",table(tmp$allelic_status)[tmp$allelic_status],")")
tmp$label = factor(tmp$label, levels=unique(tmp$label))
ggWHO = ggplot(tmp) + geom_bar(aes(x=label,fill=mywho),position="fill") + themePP + 
xlab("TP53 state") + 
scale_fill_manual(values=col.who,na.value="#737373") + ylab("Proportion of patients") + coord_flip() + labs(fill="WHO Subtypes")
ggWHO
In [405]:
table(tmp$)
          aCML    AML/AML-MRC           CMML      MDS-del5q      MDS-EB1/2 
             3             30             17             22            156 
MDS-RS-SLD/MLD    MDS-SLD/MLD          MDS-U   MDS/MPN-RS-T      MDS/MPN-U 
            27             49              4              2              5 
         t-MDS 
            62 
In [417]:
# KEEP CASES WHERE WE HAVE MORE THAN 5 CASES IN AT LEAST OF TP53 mono or bi
# WE WILL DO t-MDS SEPARATELY
tmp = dd
tmp = tmp[!tmp$WHO_2016_MERGE %in% c("t-MDS","aCML","MDS-U","MDS/MPN-RS-T","MDS/MPN-U","other") ,]
tmp$WHO_2016_MERGE <- factor(tmp$WHO_2016_MERGE, 
                             levels=c("MDS-del5q","MDS-SLD/MLD","MDS-RS-SLD/MLD","MDS-EB1/2","AML/AML-MRC","CMML")
)
In [420]:
options(repr.plot.width=20, repr.plot.height=12)
ff = as.formula(paste("Surv(os_sample_years,os_status)~","WHO_2016_MERGE + allelic_status"))
kmfit = survfit(ff,data=tmp)
kmfit$call$formula <- ff
ggwhoOS = ggsurvplot(kmfit, data=tmp, legend.title="", conf.int=F, size=1.6, xlim=c(0,15),color="allelic_status",
                     font.x=30, font.y=30, font.tickslab=30, font.legend=30,break.time.by=5,
                    )$plot +
scale_color_manual(values=c("darkgrey",col.status),name="TP53 state") + facet_wrap(~WHO_2016_MERGE,ncol=3) + 
theme(strip.text.x = element_text(size=30)) +
ylab("Probability of overall survival") + xlab("Years")

posgo1 = c()
posgo2 = c()
posgo3= c()
for (mmw in levels(tmp$WHO_2016_MERGE)) {
    pvos = pairwise_survdiff(formula=ff, data=tmp[tmp$allelic_status%in%c("1mut","multi")&tmp$WHO_2016_MERGE%in%mmw,], p.adjust.method = "none")
    pvos2 = pairwise_survdiff(formula=ff, data=tmp[tmp$allelic_status%in%c("1mut","WT")&tmp$WHO_2016_MERGE%in%mmw,], p.adjust.method = "none")
    pvos3 = pairwise_survdiff(formula=ff, data=tmp[tmp$allelic_status%in%c("multi","WT")&tmp$WHO_2016_MERGE%in%mmw,], p.adjust.method = "none")
    posgo1 = c(posgo1,signif(pvos$p.value[1,1],1))
    posgo2 = c(posgo2,signif(pvos2$p.value[1,1],1))
    posgo3 = c(posgo3,signif(pvos3$p.value[1,1],1))
}

ann_text = data.frame(WHO_2016_MERGE=levels(tmp$WHO_2016_MERGE),x=13.5,y=1,label=paste0("p=",posgo1))
ann_text2 = data.frame(WHO_2016_MERGE=levels(tmp$WHO_2016_MERGE),x=13.5,y=0.9,label=paste0("p=",posgo2)) 
ann_text3 = data.frame(WHO_2016_MERGE=levels(tmp$WHO_2016_MERGE),x=13.5,y=0.8,label=paste0("p=",posgo3))
ggwhoOS = ggwhoOS + 
# MONO | BI
annotate("segment",color=col.status[1],x=9,xend=10,y=1,yend=1,size=1.3) + 
annotate("text",label="vs",y=1,x=10.5,size=7) +
annotate("segment",color=col.status[2],x=11,xend=12,y=1,yend=1,size=1.3) +
geom_text(data=ann_text, aes(x=x,y=y,label=label),size=7) + 
#annotate("text",label=paste0("p=",posgo1),x=13.5,y=1,size=7) +
# MONO | WT
annotate("segment",color=col.status[1],x=9,xend=10,y=0.9,yend=0.9,size=1.3) + 
annotate("text",label="vs",y=0.9,x=10.5,size=7) +
annotate("segment",color="darkgrey",x=11,xend=12,y=0.9,yend=0.9,size=1.3) +
geom_text(data=ann_text2, aes(x=x,y=y,label=label),size=7) + 
#annotate("text",label=paste0("p=",posgo2),x=13.5,y=0.9,size=7)
# BI | WT
annotate("segment",color=col.status[2],x=9,xend=10,y=0.8,yend=0.8,size=1.3) + 
annotate("text",label="vs",y=0.8,x=10.5,size=7) +
annotate("segment",color="darkgrey",x=11,xend=12,y=0.8,yend=0.8,size=1.3) +
geom_text(data=ann_text3, aes(x=x,y=y,label=label),size=7)

ggwhoOS

More: IPSS-R distribution and OS per TP53 allelic stateΒΆ

We look at the representation of IPSS-R risk scores per TP53 allelic state and the OS across the IPSS-R strata.

In [422]:
# IPSSR
options(repr.plot.width=20, repr.plot.height=5)
tmp = dd[which(dd$allelic_status%in%c("1mut","multi")),]
tmp$label = paste0(tmp$allelic_status,"\n","(N=",table(tmp$allelic_status)[tmp$allelic_status],")")
tmp$label = factor(tmp$label, levels=unique(tmp$label))
aa$label = paste0(aa$allelic_status,"\n","(N=",table(aa$allelic_status)[aa$allelic_status],")")
ggIPSSR = ggplot(tmp) + geom_bar(aes(x=label,fill=IPSSR_CALCULATED),position="fill") + themePP + 
xlab("TP53 state") + 
scale_fill_manual(values=col.ipssr,na.value="#737373") + ylab("Proportion of patients") + coord_flip() + labs(fill="IPSS-R")
ggIPSSR
In [423]:
dd$my_ipssr = as.vector(dd$IPSSR_CALCULATED)
dd$my_ipssr[dd$IPSSR_CALCULATED%in%c("VERY-GOOD","GOOD")] = "(VERY)-GOOD"
dd$my_ipssr[dd$IPSSR_CALCULATED%in%c("VERY-POOR","POOR")] = "(VERY)-POOR"
dd$my_ipssr = factor(dd$my_ipssr, levels=c("(VERY)-GOOD","INT","(VERY)-POOR"))
In [424]:
options(repr.plot.width=18, repr.plot.height=6)
ff = as.formula(paste("Surv(os_sample_years,os_status)~","my_ipssr + allelic_status"))
kmfit = survfit(ff,data=dd)
kmfit$call$formula <- ff
ggipssrOS = ggsurvplot(kmfit, data=dd, legend.title="", conf.int=F, size=1.5, xlim=c(0,15),color="allelic_status",breaks=5,
                       font.x=30, font.y=30, font.tickslab=30, font.legend=30
                      )$plot +
theme(strip.text.x = element_text(size=30)) +
scale_color_manual(values=c("darkgrey",col.status),name="TP53 state") + facet_wrap(~my_ipssr,ncol=3) +
ylab("Probability of overall survival") + xlab("Years")

posgo1 = c()
posgo2 = c()
posgo3 = c()
for (mmw in levels(dd$my_ipssr)) {
    pvos = pairwise_survdiff(formula=ff, data=dd[dd$allelic_status%in%c("1mut","multi")& dd$my_ipssr%in%mmw,], p.adjust.method = "none")
    pvos2 = pairwise_survdiff(formula=ff, data=dd[dd$allelic_status%in%c("1mut","WT")& dd$my_ipssr%in%mmw,], p.adjust.method = "none")
    pvos3 = pairwise_survdiff(formula=ff, data=dd[dd$allelic_status%in%c("multi","WT")& dd$my_ipssr%in%mmw,], p.adjust.method = "none")
    posgo1 = c(posgo1,signif(pvos$p.value[1,1],1))
    posgo2 = c(posgo2,signif(pvos2$p.value[1,1],1))
    posgo3 = c(posgo3,signif(pvos3$p.value[1,1],1))
}

ann_text = data.frame(my_ipssr=levels(dd$my_ipssr),x=13.8,y=1,label=paste0("p=",posgo1))
ann_text2 = data.frame(my_ipssr=levels(dd$my_ipssr),x=13.4,y=0.9,label=paste0("p=",posgo2)) 
ann_text3 = data.frame(my_ipssr=levels(dd$my_ipssr),x=13.8,y=0.8,label=paste0("p=",posgo3)) 
ggipssrOS = ggipssrOS + 
# MONO | BI
annotate("segment",color=col.status[1],x=9,xend=10,y=1,yend=1,size=1.3) + 
annotate("text",label="vs",y=1,x=10.5,size=7) +
annotate("segment",color=col.status[2],x=11,xend=12,y=1,yend=1,size=1.3) +
geom_text(data = ann_text, aes(x=x,y=y,label=label),size=7) +
#annotate("text",label=paste0("p=",posgo1),x=13.6,y=1,size=7) #+
# MONO | WT
annotate("segment",color=col.status[1],x=9,xend=10,y=0.9,yend=0.9,size=1.3) + 
annotate("text",label="vs",y=0.9,x=10.5,size=7) +
annotate("segment",color="darkgrey",x=11,xend=12,y=0.9,yend=0.9,size=1.3) +
#annotate("text",label=paste0("p=",posgo2),x=13.6,y=0.9,size=7)
geom_text(data = ann_text2, aes(x=x,y=y,label=label),size=7) +
# BI | WT
annotate("segment",color=col.status[2],x=9,xend=10,y=0.8,yend=0.8,size=1.3) + 
annotate("text",label="vs",y=0.8,x=10.5,size=7) +
annotate("segment",color="darkgrey",x=11,xend=12,y=0.8,yend=0.8,size=1.3) +
#annotate("text",label=paste0("p=",posgo2),x=13.6,y=0.9,size=7)
geom_text(data = ann_text3, aes(x=x,y=y,label=label),size=7)

ggipssrOS

More: Interaction with complex karyotypeΒΆ

We investigate the interaction between compplex karyotype (CK) and TP53 allelic state.

CK is defined as 3 independent chromosomal alterations (excluding cnloh).

In [425]:
table(dd$tp53,dd$complex)
     
      complex non-complex
  MUT     247         131
  WT       82        2864

Below the interaction in OS between CK and TP53 mutated or WT.

In [426]:
options(repr.plot.width=12, repr.plot.height=7)
ff = as.formula(paste("Surv(os_sample_years,os_status)~","complex + tp53"))
kmfit = survfit(ff,data=dd)
kmfit$call$formula <- ff
#myleg = paste0(gsub("allelic_status=","",names(kmfit$strata))," (N=",kmfit$n,")")
ggckeasy = ggsurvplot(kmfit, data=dd, legend.title="",conf.int=F, size=2.1, xlim=c(0,15),linetype="complex",color="tp53",breaks=5,
                      font.x=25, font.y=25, font.tickslab=25, font.legend=22,
                     )$plot +
scale_color_manual(values=rev(pal.wt.mut),name="TP53",labels=c("WT","MUT"))  + 
scale_linetype_manual(values=c("solid","dashed"),name="Complex karyotype") + theme(legend.key.width = unit(2.3,"cm")) +
xlab("Years") + ylab("Probability of overall survival")
ggckeasy
pt.ck.wt.mut = pairwise_survdiff(ff,data=dd,p.adjust.method = NULL)$p.value
In [427]:
# Pairwise Log-Rank test p-values:
pt.ck.wt.mut
A matrix: 3 Γ— 3 of type dbl
complex=complex, tp53=MUTcomplex=complex, tp53=WT complex=non-complex, tp53=MUT
complex=complex, tp53=WT 1.796802e-06 NA NA
complex=non-complex, tp53=MUT 1.858600e-201.392767e-03 NA
complex=non-complex, tp53=WT 7.347668e-1192.815284e-090.09293486

Same below but only from the CK cases, TP53 mutated or wild-type:

In [428]:
options(repr.plot.width=12, repr.plot.height=7)
tmp = dd[dd$complex=="complex",]
tmp$var= "TP53 WT & complex"
tmp$var[tmp$tp53=="MUT"] = "TP53 MUT & complex"
tmp$var = factor(tmp$var, levels=c("TP53 WT & complex","TP53 MUT & complex"))
ff = as.formula(paste("Surv(os_sample_years,os_status)~","var"))
kmfit = survfit(ff,data=tmp)
kmfit$call$formula <- ff

myleg = paste0(gsub("var=","",names(kmfit$strata))," (N=",kmfit$n,")")
ggckeasy2 = ggsurvplot(kmfit, data=tmp, legend.title="",conf.int=F, size=2.1, xlim=c(0,15),color="var",breaks=5,
                      font.x=25, font.y=25, font.tickslab=25, font.legend=22,
                      legend.labs=myleg,pval.coord = c(0, 0.03),pval.size=7,
                       pval=T,
                     )$plot +
scale_color_manual(values=c("#c994c7","#7a0177"),name="")  + 
theme(legend.key.width = unit(1.5,"cm")) +
xlab("Years") + ylab("Probability of overall survival")
ggckeasy2

We now look at the interaction between CK and TP53 allelic state.

First below the number/proportions of cases with or without CK per TP53 allelic state.

In [429]:
table(dd$allelic_status, dd$complex)
       
        complex non-complex
  WT         71        2851
  1mut       16         109
  multi     231          22
In [438]:
options(repr.plot.width=6, repr.plot.height=5)
ggckstate = ggplot(dd[which(dd$allelic_status%in%c("1mut","multi")),]) + 
geom_bar(aes(x=allelic_status, alpha=complex, fill=allelic_status, color=allelic_status)) + 
themePP + topleg  +
xlab("TP53 state") + ylab("Number of patients") + nolegtitle + 
scale_fill_manual(values=col.status,guide=FALSE) + 
scale_color_manual(values=col.status,guide=FALSE) + scale_alpha_manual(values=c(0.3,1))
ggckstate

Kaplan Meier probability estimates of OS across cases with ot without CK and per TP53 allelic states.

In [439]:
options(repr.plot.width=16, repr.plot.height=9)
ff = as.formula(paste("Surv(os_sample_years,os_status)~","complex + allelic_status"))
kmfit = survfit(ff,data=dd)
kmfit$call$formula <- ff
ggckstateos = ggsurvplot(kmfit, data=dd, legend.title="",conf.int=F, size=2.1, xlim=c(0,15),linetype="complex",color="allelic_status",breaks=5,
                      font.x=25, font.y=25, font.tickslab=25, font.legend=22,
                     )$plot +
scale_color_manual(values=c("darkgrey",col.status),name="TP53")  + 
scale_linetype_manual(values=c("solid","dashed"),name="Complex karyotype") + theme(legend.key.width = unit(2.1,"cm")) +
xlab("Years") + ylab("Probability of overall survival")
ptckstate = pairwise_survdiff(ff, data=dd, p.adjust.method = "none")$p.value
ggckstateos
In [440]:
# Pairwise Log-Rank test p-values:
pairwise_survdiff(ff, data=dd, p.adjust.method = "none")$p.value
A matrix: 5 Γ— 5 of type dbl
complex=complex, allelic_status=WT complex=complex, allelic_status=1mut complex=complex, allelic_status=multicomplex=non-complex, allelic_status=WT complex=non-complex, allelic_status=1mut
complex=complex, allelic_status=1mut 9.777291e-01 NA NA NA NA
complex=complex, allelic_status=multi1.387361e-070.014606007 NA NA NA
complex=non-complex, allelic_status=WT 1.293410e-080.0062659291.881623e-126 NA NA
complex=non-complex, allelic_status=1mut 6.074428e-040.052117084 5.155703e-210.39152367 NA
complex=non-complex, allelic_status=multi3.900838e-010.367555608 4.571781e-050.010732340.09852355
In [441]:
# Hazard Ratio multi vs. 1mut on cases with CK:
summary( coxph(as.formula(paste("Surv(os_sample_years,os_status)~","allelic_status")),data=dd[dd$allelic_status%in%c("1mut","multi") & dd$complex=="complex",]) )
Call:
coxph(formula = as.formula(paste("Surv(os_sample_years,os_status)~", 
    "allelic_status")), data = dd[dd$allelic_status %in% c("1mut", 
    "multi") & dd$complex == "complex", ])

  n= 242, number of events= 194 
   (5 observations deleted due to missingness)

                       coef exp(coef) se(coef)      z Pr(>|z|)  
allelic_status1mut  -0.8165    0.4420   0.3437 -2.375   0.0175 *
allelic_statusmulti      NA        NA   0.0000     NA       NA  
---
Signif. codes:  0 β€˜***’ 0.001 β€˜**’ 0.01 β€˜*’ 0.05 β€˜.’ 0.1 β€˜ ’ 1

                    exp(coef) exp(-coef) lower .95 upper .95
allelic_status1mut      0.442      2.262    0.2253    0.8669
allelic_statusmulti        NA         NA        NA        NA

Concordance= 0.521  (se = 0.012 )
Likelihood ratio test= 7.23  on 1 df,   p=0.007
Wald test            = 5.64  on 1 df,   p=0.02
Score (logrank) test = 5.96  on 1 df,   p=0.01
In [442]:
# Hazard Ratio multi vs. 1mut on cases without CK:
summary( coxph(as.formula(paste("Surv(os_sample_years,os_status)~","allelic_status")),data=dd[dd$allelic_status%in%c("1mut","multi") & dd$complex=="non-complex",]) )
Call:
coxph(formula = as.formula(paste("Surv(os_sample_years,os_status)~", 
    "allelic_status")), data = dd[dd$allelic_status %in% c("1mut", 
    "multi") & dd$complex == "non-complex", ])

  n= 126, number of events= 69 
   (5 observations deleted due to missingness)

                       coef exp(coef) se(coef)      z Pr(>|z|)
allelic_status1mut  -0.4866    0.6147   0.2973 -1.637    0.102
allelic_statusmulti      NA        NA   0.0000     NA       NA

                    exp(coef) exp(-coef) lower .95 upper .95
allelic_status1mut     0.6147      1.627    0.3433     1.101
allelic_statusmulti        NA         NA        NA        NA

Concordance= 0.536  (se = 0.028 )
Likelihood ratio test= 2.45  on 1 df,   p=0.1
Wald test            = 2.68  on 1 df,   p=0.1
Score (logrank) test = 2.73  on 1 df,   p=0.1
In [443]:
# Cox model of OS with TP53 allelic state and complex karyotype as covariates:
summary( coxph(as.formula(paste("Surv(os_sample_years,os_status)~","allelic_status + complex")),data=dd ))
Call:
coxph(formula = as.formula(paste("Surv(os_sample_years,os_status)~", 
    "allelic_status + complex")), data = dd)

  n= 3148, number of events= 1529 
   (176 observations deleted due to missingness)

                       coef exp(coef) se(coef)      z Pr(>|z|)    
allelic_status1mut   0.1078    1.1138   0.1302  0.828    0.408    
allelic_statusmulti  0.8632    2.3707   0.1265  6.826 8.72e-12 ***
complexnon-complex  -0.8652    0.4210   0.1161 -7.450 9.31e-14 ***
---
Signif. codes:  0 β€˜***’ 0.001 β€˜**’ 0.01 β€˜*’ 0.05 β€˜.’ 0.1 β€˜ ’ 1

                    exp(coef) exp(-coef) lower .95 upper .95
allelic_status1mut      1.114     0.8978    0.8630    1.4377
allelic_statusmulti     2.371     0.4218    1.8503    3.0374
complexnon-complex      0.421     2.3754    0.3353    0.5286

Concordance= 0.581  (se = 0.006 )
Likelihood ratio test= 335.2  on 3 df,   p=<2e-16
Wald test            = 466.9  on 3 df,   p=<2e-16
Score (logrank) test = 567.7  on 3 df,   p=<2e-16

More: Interaction with mutation typesΒΆ

In [516]:
# Main Hotspots
main.hot = names(sort(table(mm$aa_number),decreasing=T))[1:4]
main.hot
mm$is_hotspot = "no"
mm$is_hotspot[which(mm$aa_number %in% main.hot)] = "yes"

mm$myeffect = gsub("synonymous_codon","splice",gsub("non_synonymous_codon","missense",gsub("_variant","",mm$EFFECT)))
mm$myeffect = gsub("_codon","",gsub("_site","",mm$myeffect))
mm$myeffect[mm$myeffect%in%c("stop_lost","stop_retained")] = "frameshift"
mm$myeffect = factor(mm$myeffect, levels=c("frameshift","stop_gained","splice","missense","inframe_gain","inframe_loss"))
mm$my_truncated = "truncated"
mm$my_truncated[mm$myeffect%in%c("missense","inframe_gain","inframe_loss")] = "not-truncated"
  1. '273'
  2. '248'
  3. '220'
  4. '175'
In [517]:
mm$effect_label = as.vector(mm$my_truncated)
mm$effect_label[mm$myeffect%in%c("missense","inframe_loss","inframe_gain") & mm$my_truncated%in%"not-truncated"] = "other-missense"
mm$effect_label[mm$myeffect=="missense" & mm$aa_number%in%main.hot] = "hotspot"
table(mm$effect_label,exclude=F)
mm$effect_label = factor(mm$effect_label, levels=c("truncated","hotspot","other-missense"))
       hotspot other-missense      truncated 
           104            264            118 
In [518]:
options(repr.plot.width=13, repr.plot.height=5)
mycoleffect = c("#e7298a","#88419d","darkgreen")
ggplot(mm) + geom_bar(aes(x=detail_status,fill=effect_label),position="fill") + themePP + coord_flip()  + 
scale_fill_manual(values=mycoleffect, labels=c("truncated ","hotspot (273, 248, 220, 175) ","other-missense")) + 
topleg + nolegtitle +
ylab("Proportion") + xlab("TP53 state")
In [519]:
tmp = aa[which(aa$detail_status%in%c("1mut","mut+del","mut+cnloh")),]
tmp$effect_label = as.vector(sapply(tmp$LEUKID, function(x) {y=unique(mm[mm$TARGET_NAME==x,"effect_label"])[1]}))
tmp.keep = tmp
# Ambiguous Cases
#E-H-105514-T1-1-D1-1
#E-H-105606-T1-1-D1-1
#E-H-105660-T1-1-D1-1
#E-H-105890-T1-1-D1-1
#E-H-110703-T1-1-D1-1
#E-H-116562-T1-1-D1-1
tmp$effect_label[tmp$LEUKID%in%c("E-H-105514-T1-1-D1-1","E-H-105606-T1-1-D1-1","E-H-105606-T1-1-D1-1","E-H-105660-T1-1-D1-1","E-H-105890-T1-1-D1-1","E-H-110703-T1-1-D1-1","E-H-116562-T1-1-D1-1")] = "ambiguous"
tmp = tmp[-which(tmp$effect_label=="ambiguous"),]
tmp$effect_label = factor(tmp$effect_label, c("truncated","hotspot","other-missense"))
tmp$effect_label2 = as.vector(tmp$effect_label)
tmp$effect_label2[tmp$effect_label=="hotspot"] = "hotspot (273, 248, 220, 175)"
tmp$effect_label2 = factor(tmp$effect_label2, levels=c("truncated","hotspot (273, 248, 220, 175)","other-missense"))
In [520]:
options(repr.plot.width=17, repr.plot.height=6)
ggplot(tmp,aes(x=detail_status,y=NUM_ALL_CHR_NO17,fill=detail_status)) + geom_boxplot() + 
facet_grid(.~effect_label2) +
themePP + noleg + scale_fill_manual(values=col.subtypes[-2]) + 
ylab("No. of unique chr. other than 17 \n with aberrations per patient") + xlab("\n TP53 state") + 
scale_y_continuous(breaks=c(0,5,10,15),limits=c(0,15.1)) + 
stat_compare_means(ref.group = "1mut",method="wilcox.test",size=6,label="p.format",label.y=14.5)
In [522]:
ff = as.formula(paste("Surv(os_sample_years,os_status)~","detail_status + effect_label"))
posgo1 = c()
posgo2 = c()
for (mmw in levels(tmp$effect_label)) {
    pvos = pairwise_survdiff(formula=ff, data=tmp[tmp$detail_status%in%c("1mut","mut+del")& tmp$effect_label%in%mmw,], p.adjust.method = "none")
    pvos2 = pairwise_survdiff(formula=ff, data=tmp[tmp$detail_status%in%c("1mut","mut+cnloh")& tmp$effect_label%in%mmw,], p.adjust.method = "none")
    posgo1 = c(posgo1,signif(pvos$p.value[1,1],1))
    posgo2 = c(posgo2,signif(pvos2$p.value[1,1],1))
}
In [525]:
ann_text = data.frame(effect_label=levels(tmp$effect_label),x=13.5,y=1,label=paste0("p=",posgo1))
ann_text2 = data.frame(effect_label=levels(tmp$effect_label),x=13.5,y=0.9,label=paste0("p=",posgo2))
In [526]:
# ---- #
rr = dd[which(dd$allelic_status=="WT" & dd$os_sample_years>0),]
rr$effect_label = "none"
ss = tmp
ss$effect_label = as.vector(ss$effect_label)
hmp = rbind(rr[,c("os_sample_years","os_status","detail_status","effect_label")],ss[,c("os_sample_years","os_status","detail_status","effect_label")])
table(hmp$effect_label)
# truncated
kmfit = survfit(ff,data=hmp[hmp$effect_label%in%c("none","truncated"),])
kmfit$call$formula <- ff
ggtypeosT = ggsurvplot(kmfit, data=hmp[hmp$effect_label%in%c("none","truncated"),], 
                      legend.title="",conf.int=F, size=1.3, xlim=c(0,15),color="detail_status",breaks=5,
                      font.x=25, font.y=25, font.tickslab=25, font.legend=25
                     )$plot +
theme(strip.text.x = element_text(size=25)) +
scale_color_manual(values=c("#737373",col.subtypes[-2]),name="TP53 state") + 
xlab("Years") + ylab("Probability of overall survival") + facet_grid(.~"truncated") + 
annotate("segment",color=col.subtypes[1],x=9,xend=10,y=1,yend=1,size=1.2) + 
annotate("text",label="vs",y=1,x=10.5,size=6) +
annotate("segment",color=col.subtypes[3],x=11,xend=12,y=1,yend=1,size=1.2) +
geom_text(data=ann_text[1,,drop=F], aes(x=x,y=y,label=label),size=6) + 
annotate("segment",color=col.subtypes[1],x=9,xend=10,y=0.9,yend=0.9,size=1.2) + 
annotate("text",label="vs",y=0.9,x=10.5,size=6) +
annotate("segment",color=col.subtypes[4],x=11,xend=12,y=0.9,yend=0.9,size=1.2) +
geom_text(data=ann_text2[1,,drop=F], aes(x=x,y=y,label=label),size=6) + noleg
# hotspot
kmfit = survfit(ff,data=hmp[hmp$effect_label%in%c("none","hotspot"),])
kmfit$call$formula <- ff
ggtypeosH = ggsurvplot(kmfit, data=hmp[hmp$effect_label%in%c("none","hotspot"),], 
                      legend.title="",conf.int=F, size=1.3, xlim=c(0,15),color="detail_status",breaks=5,
                      font.x=25, font.y=25, font.tickslab=25, font.legend=18
                     )$plot +
theme(strip.text.x = element_text(size=25)) +
scale_color_manual(values=c("#737373",col.subtypes[-2]),name="TP53 state") + 
xlab("Years") + ylab("Probability of overall survival") + facet_grid(.~"hotspot (273, 248, 220, 175)") + 
annotate("segment",color=col.subtypes[1],x=9,xend=10,y=1,yend=1,size=1.2) + 
annotate("text",label="vs",y=1,x=10.5,size=6) +
annotate("segment",color=col.subtypes[3],x=11,xend=12,y=1,yend=1,size=1.2) +
geom_text(data=ann_text[2,,drop=F], aes(x=x,y=y,label=label),size=6) + 
annotate("segment",color=col.subtypes[1],x=9,xend=10,y=0.9,yend=0.9,size=1.2) + 
annotate("text",label="vs",y=0.9,x=10.5,size=6) +
annotate("segment",color=col.subtypes[4],x=11,xend=12,y=0.9,yend=0.9,size=1.2) +
geom_text(data=ann_text2[2,,drop=F], aes(x=x,y=y,label=label),size=6)  + 
theme(legend.key.width = unit(1.2,"cm")) +  noleg + theme(axis.title.y=element_blank())
# other
kmfit = survfit(ff,data=hmp[hmp$effect_label%in%c("none","other-missense"),])
kmfit$call$formula <- ff
ggtypeosO = ggsurvplot(kmfit, data=hmp[hmp$effect_label%in%c("none","other-missense"),], 
                      legend.title="",conf.int=F, size=1.3, xlim=c(0,15),color="detail_status",breaks=5,
                      font.x=25, font.y=25, font.tickslab=25, font.legend=25
                     )$plot +
theme(strip.text.x = element_text(size=25)) + 
scale_color_manual(values=c("#737373",col.subtypes[-2]),name="TP53 state") + 
xlab("Years") + ylab("Probability of overall survival") + facet_grid(.~"other-missense") + 
annotate("segment",color=col.subtypes[1],x=9,xend=10,y=1,yend=1,size=1.2) + 
annotate("text",label="vs",y=1,x=10.5,size=6) +
annotate("segment",color=col.subtypes[3],x=11,xend=12,y=1,yend=1,size=1.2) +
geom_text(data=ann_text[2,,drop=F], aes(x=x,y=y,label=label),size=6) + 
annotate("segment",color=col.subtypes[1],x=9,xend=10,y=0.9,yend=0.9,size=1.2) + 
annotate("text",label="vs",y=0.9,x=10.5,size=6) +
annotate("segment",color=col.subtypes[4],x=11,xend=12,y=0.9,yend=0.9,size=1.2) +
geom_text(data=ann_text2[2,,drop=F], aes(x=x,y=y,label=label),size=6) + noleg + theme(axis.title.y=element_blank())
       hotspot           none other-missense      truncated 
            61           2757            162             59 
In [528]:
options(repr.plot.width=20, repr.plot.height=7)
ggarrange(ggtypeosT, 
                      ggtypeosH, 
                      ggtypeosO, 
                      ncol=3)

R sessionΒΆ

In [444]:
sessionInfo()
R version 3.6.1 (2019-07-05)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS Mojave 10.14.6

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] stringr_1.4.0   cmprsk_2.2-9    survminer_0.4.6 ggpubr_0.3.0   
[5] survival_3.1-12 gridExtra_2.3   reshape2_1.4.4  ggplot2_3.3.0  
[9] colorout_1.2-2 

loaded via a namespace (and not attached):
 [1] Rcpp_1.0.4        lattice_0.20-41   tidyr_1.0.3       zoo_1.8-8        
 [5] assertthat_0.2.1  digest_0.6.25     IRdisplay_0.7.0   R6_2.4.1         
 [9] cellranger_1.1.0  plyr_1.8.6        repr_1.1.0        backports_1.1.7  
[13] evaluate_0.14     pillar_1.4.4      rlang_0.4.6       curl_4.3         
[17] uuid_0.1-4        readxl_1.3.1      data.table_1.12.8 car_3.0-7        
[21] Matrix_1.2-18     labeling_0.3      splines_3.6.1     foreign_0.8-75   
[25] munsell_0.5.0     broom_0.5.6       compiler_3.6.1    xfun_0.13        
[29] pkgconfig_2.0.3   base64enc_0.1-3   htmltools_0.4.0   tidyselect_1.1.0 
[33] tibble_3.0.1      km.ci_0.5-2       rio_0.5.16        crayon_1.3.4     
[37] dplyr_0.8.5       withr_2.2.0       grid_3.6.1        xtable_1.8-4     
[41] nlme_3.1-147      jsonlite_1.6.1    gtable_0.3.0      lifecycle_0.2.0  
[45] magrittr_1.5      KMsurv_0.1-5      scales_1.1.1      zip_2.0.4        
[49] stringi_1.4.6     carData_3.0-3     farver_2.0.3      ggsignif_0.6.0   
[53] ellipsis_0.3.0    survMisc_0.5.5    generics_0.0.2    vctrs_0.3.0      
[57] cowplot_1.0.0     openxlsx_4.1.5    IRkernel_1.1      tools_3.6.1      
[61] forcats_0.5.0     glue_1.4.1        purrr_0.3.4       hms_0.5.3        
[65] abind_1.4-5       colorspace_1.4-1  rstatix_0.5.0     pbdZMQ_0.3-3     
[69] knitr_1.28        haven_2.2.0      
In [ ]: